aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r--contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp1843
1 files changed, 1296 insertions, 547 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 14e0cba62b23..57cc2d60e2af 100644
--- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -180,7 +180,7 @@ public:
UntiedCodeGen(CGF);
CodeGenFunction::JumpDest CurPoint =
CGF.getJumpDestInCurrentScope(".untied.next.");
- CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
+ CGF.EmitBranch(CGF.ReturnBlock.getBlock());
CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
CGF.Builder.GetInsertBlock());
@@ -1404,6 +1404,19 @@ void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
}
}
+static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
+ SourceLocation Loc,
+ SmallString<128> &Buffer) {
+ llvm::raw_svector_ostream OS(Buffer);
+ // Build debug location
+ PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+ OS << ";" << PLoc.getFilename() << ";";
+ if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
+ OS << FD->getQualifiedNameAsString();
+ OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
+ return OS.str();
+}
+
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
SourceLocation Loc,
unsigned Flags) {
@@ -1430,6 +1443,16 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
SourceLocation Loc) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
+ // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
+ // the clang invariants used below might be broken.
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ SmallString<128> Buffer;
+ OMPBuilder.updateToLocation(CGF.Builder.saveIP());
+ auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
+ getIdentStringFromSourceLocation(CGF, Loc, Buffer));
+ return OMPBuilder.getOrCreateThreadID(
+ OMPBuilder.getOrCreateIdent(SrcLocStr));
+ }
llvm::Value *ThreadID = nullptr;
// Check whether we've already cached a load of the thread id in this
@@ -1503,6 +1526,7 @@ void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
FunctionUDMMap.erase(I);
}
LastprivateConditionalToTypes.erase(CGF.CurFn);
+ FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
@@ -2074,6 +2098,14 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+
+ // Ensure we do not inline the function. This is trivially true for the ones
+ // passed to __kmpc_fork_call but the ones calles in serialized regions
+ // could be inlined. This is not a perfect but it is closer to the invariant
+ // we want, namely, every data environment starts with a new function.
+ // TODO: We should pass the if condition to the runtime function and do the
+ // handling there. Much cleaner code.
+ OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
// __kmpc_end_serialized_parallel(&Loc, GTid);
@@ -2236,7 +2268,7 @@ void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
if (!CGF.HaveInsertPoint())
return;
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
- OMPBuilder.CreateTaskyield(CGF.Builder);
+ OMPBuilder.createTaskyield(CGF.Builder);
} else {
// Build call __kmpc_omp_taskyield(loc, thread_id, 0);
llvm::Value *Args[] = {
@@ -2491,7 +2523,7 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
auto *OMPRegionInfo =
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
- CGF.Builder.restoreIP(OMPBuilder.CreateBarrier(
+ CGF.Builder.restoreIP(OMPBuilder.createBarrier(
CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
return;
}
@@ -2853,7 +2885,7 @@ void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
SourceLocation Loc, llvm::AtomicOrdering AO) {
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
- OMPBuilder.CreateFlush(CGF.Builder);
+ OMPBuilder.createFlush(CGF.Builder);
} else {
if (!CGF.HaveInsertPoint())
return;
@@ -2917,20 +2949,23 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
// If we are emitting code for a target, the entry is already initialized,
// only has to be registered.
if (CGM.getLangOpts().OpenMPIsDevice) {
- if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
- unsigned DiagID = CGM.getDiags().getCustomDiagID(
- DiagnosticsEngine::Error,
- "Unable to find target region on line '%0' in the device code.");
- CGM.getDiags().Report(DiagID) << LineNum;
- return;
- }
+ // This could happen if the device compilation is invoked standalone.
+ if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
+ initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
+ OffloadingEntriesNum);
auto &Entry =
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
- assert(Entry.isValid() && "Entry not initialized!");
Entry.setAddress(Addr);
Entry.setID(ID);
Entry.setFlags(Flags);
} else {
+ if (Flags ==
+ OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
+ hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
+ /*IgnoreAddressId*/ true))
+ return;
+ assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
+ "Target region entry already registered!");
OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
++OffloadingEntriesNum;
@@ -2938,8 +2973,8 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
}
bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
- unsigned DeviceID, unsigned FileID, StringRef ParentName,
- unsigned LineNum) const {
+ unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
+ bool IgnoreAddressId) const {
auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
if (PerDevice == OffloadEntriesTargetRegion.end())
return false;
@@ -2953,7 +2988,8 @@ bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
if (PerLine == PerParentName->second.end())
return false;
// Fail if this entry is already registered.
- if (PerLine->second.getAddress() || PerLine->second.getID())
+ if (!IgnoreAddressId &&
+ (PerLine->second.getAddress() || PerLine->second.getID()))
return false;
return true;
}
@@ -2985,9 +3021,10 @@ void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
OMPTargetGlobalVarEntryKind Flags,
llvm::GlobalValue::LinkageTypes Linkage) {
if (CGM.getLangOpts().OpenMPIsDevice) {
+ // This could happen if the device compilation is invoked standalone.
+ if (!hasDeviceGlobalVarEntryInfo(VarName))
+ initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum);
auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
- assert(Entry.isValid() && Entry.getFlags() == Flags &&
- "Entry not initialized!");
assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
"Resetting with the new address.");
if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
@@ -3043,11 +3080,12 @@ void CGOpenMPRuntime::createOffloadEntry(
llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
- llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
- llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
- llvm::ConstantInt::get(CGM.SizeTy, Size),
- llvm::ConstantInt::get(CGM.Int32Ty, Flags),
- llvm::ConstantInt::get(CGM.Int32Ty, 0)};
+ llvm::Constant *Data[] = {
+ llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
+ llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
+ llvm::ConstantInt::get(CGM.SizeTy, Size),
+ llvm::ConstantInt::get(CGM.Int32Ty, Flags),
+ llvm::ConstantInt::get(CGM.Int32Ty, 0)};
std::string EntryName = getName({"omp_offloading", "entry", ""});
llvm::GlobalVariable *Entry = createGlobalStruct(
CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
@@ -3347,14 +3385,29 @@ struct PrivateHelpersTy {
const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
: OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
PrivateElemInit(PrivateElemInit) {}
+ PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
const Expr *OriginalRef = nullptr;
const VarDecl *Original = nullptr;
const VarDecl *PrivateCopy = nullptr;
const VarDecl *PrivateElemInit = nullptr;
+ bool isLocalPrivate() const {
+ return !OriginalRef && !PrivateCopy && !PrivateElemInit;
+ }
};
typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
} // anonymous namespace
+static bool isAllocatableDecl(const VarDecl *VD) {
+ const VarDecl *CVD = VD->getCanonicalDecl();
+ if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+ return false;
+ const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+ // Use the default allocation.
+ return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
+ AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
+ !AA->getAllocator());
+}
+
static RecordDecl *
createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
if (!Privates.empty()) {
@@ -3367,6 +3420,14 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
for (const auto &Pair : Privates) {
const VarDecl *VD = Pair.second.Original;
QualType Type = VD->getType().getNonReferenceType();
+ // If the private variable is a local variable with lvalue ref type,
+ // allocate the pointer instead of the pointee type.
+ if (Pair.second.isLocalPrivate()) {
+ if (VD->getType()->isLValueReferenceType())
+ Type = C.getPointerType(Type);
+ if (isAllocatableDecl(VD))
+ Type = C.getPointerType(Type);
+ }
FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
if (VD->hasAttrs()) {
for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
@@ -3620,10 +3681,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
/// \endcode
static llvm::Value *
emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
- ArrayRef<const Expr *> PrivateVars,
- ArrayRef<const Expr *> FirstprivateVars,
- ArrayRef<const Expr *> LastprivateVars,
- QualType PrivatesQTy,
+ const OMPTaskDataTy &Data, QualType PrivatesQTy,
ArrayRef<PrivateDataTy> Privates) {
ASTContext &C = CGM.getContext();
FunctionArgList Args;
@@ -3632,9 +3690,9 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
C.getPointerType(PrivatesQTy).withConst().withRestrict(),
ImplicitParamDecl::Other);
Args.push_back(&TaskPrivatesArg);
- llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
+ llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
unsigned Counter = 1;
- for (const Expr *E : PrivateVars) {
+ for (const Expr *E : Data.PrivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
@@ -3645,7 +3703,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter;
++Counter;
}
- for (const Expr *E : FirstprivateVars) {
+ for (const Expr *E : Data.FirstprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
@@ -3656,7 +3714,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter;
++Counter;
}
- for (const Expr *E : LastprivateVars) {
+ for (const Expr *E : Data.LastprivateVars) {
Args.push_back(ImplicitParamDecl::Create(
C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
C.getPointerType(C.getPointerType(E->getType()))
@@ -3667,6 +3725,19 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
PrivateVarsPos[VD] = Counter;
++Counter;
}
+ for (const VarDecl *VD : Data.PrivateLocals) {
+ QualType Ty = VD->getType().getNonReferenceType();
+ if (VD->getType()->isLValueReferenceType())
+ Ty = C.getPointerType(Ty);
+ if (isAllocatableDecl(VD))
+ Ty = C.getPointerType(Ty);
+ Args.push_back(ImplicitParamDecl::Create(
+ C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+ C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
+ ImplicitParamDecl::Other));
+ PrivateVarsPos[VD] = Counter;
+ ++Counter;
+ }
const auto &TaskPrivatesMapFnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
llvm::FunctionType *TaskPrivatesMapTy =
@@ -3727,9 +3798,9 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
bool IsTargetTask =
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
- // For target-based directives skip 3 firstprivate arrays BasePointersArray,
- // PointersArray and SizesArray. The original variables for these arrays are
- // not captured and we get their addresses explicitly.
+ // For target-based directives skip 4 firstprivate arrays BasePointersArray,
+ // PointersArray, SizesArray, and MappersArray. The original variables for
+ // these arrays are not captured and we get their addresses explicitly.
if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
(IsTargetTask && KmpTaskSharedsPtr.isValid())) {
SrcBase = CGF.MakeAddrLValue(
@@ -3739,6 +3810,11 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
}
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
for (const PrivateDataTy &Pair : Privates) {
+ // Do not initialize private locals.
+ if (Pair.second.isLocalPrivate()) {
+ ++FI;
+ continue;
+ }
const VarDecl *VD = Pair.second.PrivateCopy;
const Expr *Init = VD->getAnyInitializer();
if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
@@ -3747,7 +3823,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
const VarDecl *OriginalVD = Pair.second.Original;
// Check if the variable is the target-based BasePointersArray,
- // PointersArray or SizesArray.
+ // PointersArray, SizesArray, or MappersArray.
LValue SharedRefLValue;
QualType Type = PrivateLValue.getType();
const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
@@ -3829,6 +3905,8 @@ static bool checkInitIsRequired(CodeGenFunction &CGF,
ArrayRef<PrivateDataTy> Privates) {
bool InitRequired = false;
for (const PrivateDataTy &Pair : Privates) {
+ if (Pair.second.isLocalPrivate())
+ continue;
const VarDecl *VD = Pair.second.PrivateCopy;
const Expr *Init = VD->getAnyInitializer();
InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
@@ -3922,16 +4000,16 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
/// Checks if destructor function is required to be generated.
/// \return true if cleanups are required, false otherwise.
static bool
-checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
- bool NeedsCleanup = false;
- auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
- const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
- for (const FieldDecl *FD : PrivateRD->fields()) {
- NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
- if (NeedsCleanup)
- break;
+checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
+ ArrayRef<PrivateDataTy> Privates) {
+ for (const PrivateDataTy &P : Privates) {
+ if (P.second.isLocalPrivate())
+ continue;
+ QualType Ty = P.second.Original->getType().getNonReferenceType();
+ if (Ty.isDestructedType())
+ return true;
}
- return NeedsCleanup;
+ return false;
}
namespace {
@@ -4101,9 +4179,16 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
/*PrivateElemInit=*/nullptr));
++I;
}
- llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
- return L.first > R.first;
- });
+ for (const VarDecl *VD : Data.PrivateLocals) {
+ if (isAllocatableDecl(VD))
+ Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
+ else
+ Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
+ }
+ llvm::stable_sort(Privates,
+ [](const PrivateDataTy &L, const PrivateDataTy &R) {
+ return L.first > R.first;
+ });
QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Build type kmp_routine_entry_t (if not built yet).
emitKmpRoutineEntryT(KmpInt32Ty);
@@ -4145,9 +4230,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
std::next(TaskFunction->arg_begin(), 3)->getType();
if (!Privates.empty()) {
auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
- TaskPrivatesMap = emitTaskPrivateMappingFunction(
- CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
- FI->getType(), Privates);
+ TaskPrivatesMap =
+ emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
TaskPrivatesMap, TaskPrivatesMapTy);
} else {
@@ -4177,7 +4261,8 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
unsigned Flags = Data.Tied ? TiedFlag : 0;
bool NeedsCleanup = false;
if (!Privates.empty()) {
- NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
+ NeedsCleanup =
+ checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
if (NeedsCleanup)
Flags = Flags | DestructorsFlag;
}
@@ -6108,7 +6193,7 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
return;
if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
- OMPBuilder.CreateTaskwait(CGF.Builder);
+ OMPBuilder.createTaskwait(CGF.Builder);
} else {
// Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
// global_tid);
@@ -6391,6 +6476,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
OutlinedFn->setDSOLocal(false);
+ if (CGM.getTriple().isAMDGCN())
+ OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
} else {
std::string Name = getName({EntryFnName, "region_id"});
OutlinedFnID = new llvm::GlobalVariable(
@@ -6954,6 +7041,13 @@ public:
/// Close is a hint to the runtime to allocate memory close to
/// the target device.
OMP_MAP_CLOSE = 0x400,
+ /// 0x800 is reserved for compatibility with XLC.
+ /// Produce a runtime error if the data is not already allocated.
+ OMP_MAP_PRESENT = 0x1000,
+ /// Signal that the runtime library should use args as an array of
+ /// descriptor_dim pointers and use args_size as dims. Used when we have
+ /// non-contiguous list items in target update directive
+ OMP_MAP_NON_CONTIG = 0x100000000000,
/// The 16 MSBs of the flags indicate whether the entry is member of some
/// struct/class.
OMP_MAP_MEMBER_OF = 0xffff000000000000,
@@ -6969,6 +7063,23 @@ public:
return Offset;
}
+ /// Class that holds debugging information for a data mapping to be passed to
+ /// the runtime library.
+ class MappingExprInfo {
+ /// The variable declaration used for the data mapping.
+ const ValueDecl *MapDecl = nullptr;
+ /// The original expression used in the map clause, or null if there is
+ /// none.
+ const Expr *MapExpr = nullptr;
+
+ public:
+ MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
+ : MapDecl(MapDecl), MapExpr(MapExpr) {}
+
+ const ValueDecl *getMapDecl() const { return MapDecl; }
+ const Expr *getMapExpr() const { return MapExpr; }
+ };
+
/// Class that associates information with a base pointer to be passed to the
/// runtime library.
class BasePointerInfo {
@@ -6986,9 +7097,52 @@ public:
void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
};
+ using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
+ using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
+ using MapDimArrayTy = SmallVector<uint64_t, 4>;
+ using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
+
+ /// This structure contains combined information generated for mappable
+ /// clauses, including base pointers, pointers, sizes, map types, user-defined
+ /// mappers, and non-contiguous information.
+ struct MapCombinedInfoTy {
+ struct StructNonContiguousInfo {
+ bool IsNonContiguous = false;
+ MapDimArrayTy Dims;
+ MapNonContiguousArrayTy Offsets;
+ MapNonContiguousArrayTy Counts;
+ MapNonContiguousArrayTy Strides;
+ };
+ MapExprsArrayTy Exprs;
+ MapBaseValuesArrayTy BasePointers;
+ MapValuesArrayTy Pointers;
+ MapValuesArrayTy Sizes;
+ MapFlagsArrayTy Types;
+ MapMappersArrayTy Mappers;
+ StructNonContiguousInfo NonContigInfo;
+
+ /// Append arrays in \a CurInfo.
+ void append(MapCombinedInfoTy &CurInfo) {
+ Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
+ BasePointers.append(CurInfo.BasePointers.begin(),
+ CurInfo.BasePointers.end());
+ Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
+ Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
+ Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
+ Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
+ NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
+ CurInfo.NonContigInfo.Dims.end());
+ NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
+ CurInfo.NonContigInfo.Offsets.end());
+ NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
+ CurInfo.NonContigInfo.Counts.end());
+ NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
+ CurInfo.NonContigInfo.Strides.end());
+ }
+ };
/// Map between a struct and the its lowest & highest elements which have been
/// mapped.
@@ -7000,6 +7154,7 @@ public:
std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
0, Address::invalid()};
Address Base = Address::invalid();
+ bool IsArraySection = false;
};
private:
@@ -7008,19 +7163,26 @@ private:
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
ArrayRef<OpenMPMapModifierKind> MapModifiers;
+ ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
bool ReturnDevicePointer = false;
bool IsImplicit = false;
+ const ValueDecl *Mapper = nullptr;
+ const Expr *VarRef = nullptr;
bool ForDeviceAddr = false;
MapInfo() = default;
MapInfo(
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
OpenMPMapClauseKind MapType,
- ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
- bool IsImplicit, bool ForDeviceAddr = false)
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
+ bool ReturnDevicePointer, bool IsImplicit,
+ const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
+ bool ForDeviceAddr = false)
: Components(Components), MapType(MapType), MapModifiers(MapModifiers),
+ MotionModifiers(MotionModifiers),
ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
- ForDeviceAddr(ForDeviceAddr) {}
+ Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
};
/// If use_device_ptr or use_device_addr is used on a decl which is a struct
@@ -7138,7 +7300,8 @@ private:
/// expression.
OpenMPOffloadMappingFlags getMapTypeBits(
OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
- bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
+ ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
+ bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
OpenMPOffloadMappingFlags Bits =
IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
switch (MapType) {
@@ -7174,6 +7337,14 @@ private:
if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close)
!= MapModifiers.end())
Bits |= OMP_MAP_CLOSE;
+ if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present)
+ != MapModifiers.end())
+ Bits |= OMP_MAP_PRESENT;
+ if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present)
+ != MotionModifiers.end())
+ Bits |= OMP_MAP_PRESENT;
+ if (IsNonContiguous)
+ Bits |= OMP_MAP_NON_CONTIG;
return Bits;
}
@@ -7216,17 +7387,19 @@ private:
return ConstLength.getSExtValue() != 1;
}
- /// Generate the base pointers, section pointers, sizes and map type
- /// bits for the provided map type, map modifier, and expression components.
+ /// Generate the base pointers, section pointers, sizes, map type bits, and
+ /// user-defined mappers (all included in \a CombinedInfo) for the provided
+ /// map type, map or motion modifiers, and expression components.
/// \a IsFirstComponent should be set to true if the provided set of
/// components is the first associated with a capture.
void generateInfoForComponentList(
OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
- MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
- MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
- StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
- bool IsImplicit, bool ForDeviceAddr = false,
+ MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
+ bool IsFirstComponentList, bool IsImplicit,
+ const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
+ const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
OverlappedElements = llvm::None) const {
// The following summarizes what has to be generated for each map and the
@@ -7449,11 +7622,11 @@ private:
// can be associated with the combined storage if shared memory mode is
// active or the base declaration is not global variable.
const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
- if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
+ if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
!VD || VD->hasLocalStorage())
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
else
- FirstPointerInComplexData = IsCaptureFirstInfo;
+ FirstPointerInComplexData = true;
++I;
}
}
@@ -7483,6 +7656,12 @@ private:
// whether we are dealing with a member of a declared struct.
const MemberExpr *EncounteredME = nullptr;
+ // Track for the total number of dimension. Start from one for the dummy
+ // dimension.
+ uint64_t DimSize = 1;
+
+ bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
+
for (; I != CE; ++I) {
// If the current component is member of a struct (parent struct) mark it.
if (!EncounteredME) {
@@ -7512,9 +7691,18 @@ private:
// becomes the base address for the following components.
// A final array section, is one whose length can't be proved to be one.
+ // If the map item is non-contiguous then we don't treat any array section
+ // as final array section.
bool IsFinalArraySection =
+ !IsNonContiguous &&
isFinalArraySectionExpression(I->getAssociatedExpression());
+ // If we have a declaration for the mapping use that, otherwise use
+ // the base declaration of the map clause.
+ const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
+ ? I->getAssociatedDeclaration()
+ : BaseDecl;
+
// Get information on whether the element is a pointer. Have to do a
// special treatment for array sections given that they are built-in
// types.
@@ -7530,7 +7718,10 @@ private:
.getCanonicalType()
->isAnyPointerType()) ||
I->getAssociatedExpression()->getType()->isAnyPointerType();
- bool IsNonDerefPointer = IsPointer && !UO && !BO;
+ bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
+
+ if (OASE)
+ ++DimSize;
if (Next == CE || IsNonDerefPointer || IsFinalArraySection) {
// If this is not the last component, we expect the pointer to be
@@ -7539,6 +7730,7 @@ private:
isa<MemberExpr>(Next->getAssociatedExpression()) ||
isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
+ isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
isa<UnaryOperator>(Next->getAssociatedExpression()) ||
isa<BinaryOperator>(Next->getAssociatedExpression())) &&
"Unexpected expression");
@@ -7584,9 +7776,9 @@ private:
// Emit data for non-overlapped data.
OpenMPOffloadMappingFlags Flags =
OMP_MAP_MEMBER_OF |
- getMapTypeBits(MapType, MapModifiers, IsImplicit,
+ getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
/*AddPtrFlag=*/false,
- /*AddIsTargetParamFlag=*/false);
+ /*AddIsTargetParamFlag=*/false, IsNonContiguous);
LB = BP;
llvm::Value *Size = nullptr;
// Do bitcopy of all non-overlapped structure elements.
@@ -7605,40 +7797,57 @@ private:
break;
}
}
- BasePointers.push_back(BP.getPointer());
- Pointers.push_back(LB.getPointer());
- Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
- /*isSigned=*/true));
- Types.push_back(Flags);
+ assert(Size && "Failed to determine structure size");
+ CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
+ CombinedInfo.BasePointers.push_back(BP.getPointer());
+ CombinedInfo.Pointers.push_back(LB.getPointer());
+ CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+ Size, CGF.Int64Ty, /*isSigned=*/true));
+ CombinedInfo.Types.push_back(Flags);
+ CombinedInfo.Mappers.push_back(nullptr);
+ CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
+ : 1);
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
}
- BasePointers.push_back(BP.getPointer());
- Pointers.push_back(LB.getPointer());
+ CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
+ CombinedInfo.BasePointers.push_back(BP.getPointer());
+ CombinedInfo.Pointers.push_back(LB.getPointer());
Size = CGF.Builder.CreatePtrDiff(
CGF.EmitCastToVoidPtr(
CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
CGF.EmitCastToVoidPtr(LB.getPointer()));
- Sizes.push_back(
+ CombinedInfo.Sizes.push_back(
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
- Types.push_back(Flags);
+ CombinedInfo.Types.push_back(Flags);
+ CombinedInfo.Mappers.push_back(nullptr);
+ CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
+ : 1);
break;
}
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
- if (!IsMemberPointerOrAddr) {
- BasePointers.push_back(BP.getPointer());
- Pointers.push_back(LB.getPointer());
- Sizes.push_back(
+ if (!IsMemberPointerOrAddr ||
+ (Next == CE && MapType != OMPC_MAP_unknown)) {
+ CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
+ CombinedInfo.BasePointers.push_back(BP.getPointer());
+ CombinedInfo.Pointers.push_back(LB.getPointer());
+ CombinedInfo.Sizes.push_back(
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
+ CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
+ : 1);
+
+ // If Mapper is valid, the last component inherits the mapper.
+ bool HasMapper = Mapper && Next == CE;
+ CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
// We need to add a pointer flag for each map that comes from the
// same expression except for the first one. We also need to signal
// this map is the first one that relates with the current capture
// (there is a set of entries for each capture).
- OpenMPOffloadMappingFlags Flags =
- getMapTypeBits(MapType, MapModifiers, IsImplicit,
- !IsExpressionFirstInfo || RequiresReference ||
- FirstPointerInComplexData,
- IsCaptureFirstInfo && !RequiresReference);
+ OpenMPOffloadMappingFlags Flags = getMapTypeBits(
+ MapType, MapModifiers, MotionModifiers, IsImplicit,
+ !IsExpressionFirstInfo || RequiresReference ||
+ FirstPointerInComplexData,
+ IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
if (!IsExpressionFirstInfo) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
@@ -7657,7 +7866,7 @@ private:
}
}
- Types.push_back(Flags);
+ CombinedInfo.Types.push_back(Flags);
}
// If we have encountered a member expression so far, keep track of the
@@ -7686,6 +7895,10 @@ private:
}
}
+ // Need to emit combined struct for array sections.
+ if (IsFinalArraySection || IsNonContiguous)
+ PartialStruct.IsArraySection = true;
+
// If we have a final array section, we are done with this expression.
if (IsFinalArraySection)
break;
@@ -7697,8 +7910,191 @@ private:
IsExpressionFirstInfo = false;
IsCaptureFirstInfo = false;
FirstPointerInComplexData = false;
+ } else if (FirstPointerInComplexData) {
+ QualType Ty = Components.rbegin()
+ ->getAssociatedDeclaration()
+ ->getType()
+ .getNonReferenceType();
+ BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
+ FirstPointerInComplexData = false;
+ }
+ }
+
+ if (!IsNonContiguous)
+ return;
+
+ const ASTContext &Context = CGF.getContext();
+
+ // For supporting stride in array section, we need to initialize the first
+ // dimension size as 1, first offset as 0, and first count as 1
+ MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
+ MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
+ MapValuesArrayTy CurStrides;
+ MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
+ uint64_t ElementTypeSize;
+
+ // Collect Size information for each dimension and get the element size as
+ // the first Stride. For example, for `int arr[10][10]`, the DimSizes
+ // should be [10, 10] and the first stride is 4 btyes.
+ for (const OMPClauseMappableExprCommon::MappableComponent &Component :
+ Components) {
+ const Expr *AssocExpr = Component.getAssociatedExpression();
+ const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
+
+ if (!OASE)
+ continue;
+
+ QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
+ auto *CAT = Context.getAsConstantArrayType(Ty);
+ auto *VAT = Context.getAsVariableArrayType(Ty);
+
+ // We need all the dimension size except for the last dimension.
+ assert((VAT || CAT || &Component == &*Components.begin()) &&
+ "Should be either ConstantArray or VariableArray if not the "
+ "first Component");
+
+ // Get element size if CurStrides is empty.
+ if (CurStrides.empty()) {
+ const Type *ElementType = nullptr;
+ if (CAT)
+ ElementType = CAT->getElementType().getTypePtr();
+ else if (VAT)
+ ElementType = VAT->getElementType().getTypePtr();
+ else
+ assert(&Component == &*Components.begin() &&
+ "Only expect pointer (non CAT or VAT) when this is the "
+ "first Component");
+ // If ElementType is null, then it means the base is a pointer
+ // (neither CAT nor VAT) and we'll attempt to get ElementType again
+ // for next iteration.
+ if (ElementType) {
+ // For the case that having pointer as base, we need to remove one
+ // level of indirection.
+ if (&Component != &*Components.begin())
+ ElementType = ElementType->getPointeeOrArrayElementType();
+ ElementTypeSize =
+ Context.getTypeSizeInChars(ElementType).getQuantity();
+ CurStrides.push_back(
+ llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
+ }
+ }
+ // Get dimension value except for the last dimension since we don't need
+ // it.
+ if (DimSizes.size() < Components.size() - 1) {
+ if (CAT)
+ DimSizes.push_back(llvm::ConstantInt::get(
+ CGF.Int64Ty, CAT->getSize().getZExtValue()));
+ else if (VAT)
+ DimSizes.push_back(CGF.Builder.CreateIntCast(
+ CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
+ /*IsSigned=*/false));
+ }
+ }
+
+ // Skip the dummy dimension since we have already have its information.
+ auto DI = DimSizes.begin() + 1;
+ // Product of dimension.
+ llvm::Value *DimProd =
+ llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
+
+ // Collect info for non-contiguous. Notice that offset, count, and stride
+ // are only meaningful for array-section, so we insert a null for anything
+ // other than array-section.
+ // Also, the size of offset, count, and stride are not the same as
+ // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
+ // count, and stride are the same as the number of non-contiguous
+ // declaration in target update to/from clause.
+ for (const OMPClauseMappableExprCommon::MappableComponent &Component :
+ Components) {
+ const Expr *AssocExpr = Component.getAssociatedExpression();
+
+ if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
+ llvm::Value *Offset = CGF.Builder.CreateIntCast(
+ CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
+ /*isSigned=*/false);
+ CurOffsets.push_back(Offset);
+ CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
+ CurStrides.push_back(CurStrides.back());
+ continue;
+ }
+
+ const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
+
+ if (!OASE)
+ continue;
+
+ // Offset
+ const Expr *OffsetExpr = OASE->getLowerBound();
+ llvm::Value *Offset = nullptr;
+ if (!OffsetExpr) {
+ // If offset is absent, then we just set it to zero.
+ Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
+ } else {
+ Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
+ CGF.Int64Ty,
+ /*isSigned=*/false);
+ }
+ CurOffsets.push_back(Offset);
+
+ // Count
+ const Expr *CountExpr = OASE->getLength();
+ llvm::Value *Count = nullptr;
+ if (!CountExpr) {
+ // In Clang, once a high dimension is an array section, we construct all
+ // the lower dimension as array section, however, for case like
+ // arr[0:2][2], Clang construct the inner dimension as an array section
+ // but it actually is not in an array section form according to spec.
+ if (!OASE->getColonLocFirst().isValid() &&
+ !OASE->getColonLocSecond().isValid()) {
+ Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
+ } else {
+ // OpenMP 5.0, 2.1.5 Array Sections, Description.
+ // When the length is absent it defaults to ⌈(size −
+ // lower-bound)/stride⌉, where size is the size of the array
+ // dimension.
+ const Expr *StrideExpr = OASE->getStride();
+ llvm::Value *Stride =
+ StrideExpr
+ ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
+ CGF.Int64Ty, /*isSigned=*/false)
+ : nullptr;
+ if (Stride)
+ Count = CGF.Builder.CreateUDiv(
+ CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
+ else
+ Count = CGF.Builder.CreateNUWSub(*DI, Offset);
+ }
+ } else {
+ Count = CGF.EmitScalarExpr(CountExpr);
}
+ Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
+ CurCounts.push_back(Count);
+
+ // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
+ // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
+ // Offset Count Stride
+ // D0 0 1 4 (int) <- dummy dimension
+ // D1 0 2 8 (2 * (1) * 4)
+ // D2 1 2 20 (1 * (1 * 5) * 4)
+ // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
+ const Expr *StrideExpr = OASE->getStride();
+ llvm::Value *Stride =
+ StrideExpr
+ ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
+ CGF.Int64Ty, /*isSigned=*/false)
+ : nullptr;
+ DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
+ if (Stride)
+ CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
+ else
+ CurStrides.push_back(DimProd);
+ if (DI != DimSizes.end())
+ ++DI;
}
+
+ CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
+ CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
+ CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
}
/// Return the adjusted map modifiers if the declaration a capture refers to
@@ -7831,7 +8227,7 @@ public:
// Extract device pointer clause information.
for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
for (auto L : C->component_lists())
- DevPointersMap[L.first].push_back(L.second);
+ DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
}
/// Constructor for the declare mapper directive.
@@ -7841,15 +8237,23 @@ public:
/// Generate code for the combined entry if we have a partially mapped struct
/// and take care of the mapping flags of the arguments corresponding to
/// individual struct members.
- void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
- MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
- MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
- const StructRangeInfoTy &PartialStruct) const {
+ void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
+ MapFlagsArrayTy &CurTypes,
+ const StructRangeInfoTy &PartialStruct,
+ const ValueDecl *VD = nullptr,
+ bool NotTargetParams = true) const {
+ if (CurTypes.size() == 1 &&
+ ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
+ !PartialStruct.IsArraySection)
+ return;
+ CombinedInfo.Exprs.push_back(VD);
// Base is the base of the struct
- BasePointers.push_back(PartialStruct.Base.getPointer());
+ CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
// Pointer is the address of the lowest element
llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
- Pointers.push_back(LB);
+ CombinedInfo.Pointers.push_back(LB);
+ // There should not be a mapper for a combined entry.
+ CombinedInfo.Mappers.push_back(nullptr);
// Size is (addr of {highest+1} element) - (addr of lowest element)
llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
@@ -7858,28 +8262,39 @@ public:
llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
/*isSigned=*/false);
- Sizes.push_back(Size);
- // Map type is always TARGET_PARAM
- Types.push_back(OMP_MAP_TARGET_PARAM);
- // Remove TARGET_PARAM flag from the first element
- (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
+ CombinedInfo.Sizes.push_back(Size);
+ // Map type is always TARGET_PARAM, if generate info for captures.
+ CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
+ : OMP_MAP_TARGET_PARAM);
+ // If any element has the present modifier, then make sure the runtime
+ // doesn't attempt to allocate the struct.
+ if (CurTypes.end() !=
+ llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
+ return Type & OMP_MAP_PRESENT;
+ }))
+ CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
+ // Remove TARGET_PARAM flag from the first element if any.
+ if (!CurTypes.empty())
+ CurTypes.front() &= ~OMP_MAP_TARGET_PARAM;
// All other current entries will be MEMBER_OF the combined entry
// (except for PTR_AND_OBJ entries which do not have a placeholder value
// 0xFFFF in the MEMBER_OF field).
OpenMPOffloadMappingFlags MemberOfFlag =
- getMemberOfFlag(BasePointers.size() - 1);
+ getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
for (auto &M : CurTypes)
setCorrectMemberOfFlag(M, MemberOfFlag);
}
- /// Generate all the base pointers, section pointers, sizes and map
- /// types for the extracted mappable expressions. Also, for each item that
- /// relates with a device pointer, a pair of the relevant declaration and
- /// index where it occurs is appended to the device pointers info array.
- void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
- MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
- MapFlagsArrayTy &Types) const {
+ /// Generate all the base pointers, section pointers, sizes, map types, and
+ /// mappers for the extracted mappable expressions (all included in \a
+ /// CombinedInfo). Also, for each item that relates with a device pointer, a
+ /// pair of the relevant declaration and index where it occurs is appended to
+ /// the device pointers info array.
+ void generateAllInfo(
+ MapCombinedInfoTy &CombinedInfo,
+ const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
// We have to process the component lists that relate with the same
// declaration in a single chunk so that we can generate the map flags
// correctly. Therefore, we organize all lists in a map.
@@ -7888,36 +8303,56 @@ public:
// Helper function to fill the information map for the different supported
// clauses.
auto &&InfoGen =
- [&Info](const ValueDecl *D,
- OMPClauseMappableExprCommon::MappableExprComponentListRef L,
- OpenMPMapClauseKind MapType,
- ArrayRef<OpenMPMapModifierKind> MapModifiers,
- bool ReturnDevicePointer, bool IsImplicit,
- bool ForDeviceAddr = false) {
+ [&Info, &SkipVarSet](
+ const ValueDecl *D,
+ OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+ OpenMPMapClauseKind MapType,
+ ArrayRef<OpenMPMapModifierKind> MapModifiers,
+ ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
+ bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
+ const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
const ValueDecl *VD =
D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
- Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
- IsImplicit, ForDeviceAddr);
+ if (SkipVarSet.count(VD))
+ return;
+ Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers,
+ ReturnDevicePointer, IsImplicit, Mapper, VarRef,
+ ForDeviceAddr);
};
assert(CurDir.is<const OMPExecutableDirective *>() &&
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
- for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
+ const auto *EI = C->getVarRefs().begin();
for (const auto L : C->component_lists()) {
- InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
- /*ReturnDevicePointer=*/false, C->isImplicit());
+ // The Expression is not correct if the mapping is implicit
+ const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
+ InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
+ C->getMapTypeModifiers(), llvm::None,
+ /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
+ E);
+ ++EI;
}
- for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
+ }
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) {
+ const auto *EI = C->getVarRefs().begin();
for (const auto L : C->component_lists()) {
- InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
- /*ReturnDevicePointer=*/false, C->isImplicit());
+ InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
+ C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
+ C->isImplicit(), std::get<2>(L), *EI);
+ ++EI;
}
- for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
+ }
+ for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) {
+ const auto *EI = C->getVarRefs().begin();
for (const auto L : C->component_lists()) {
- InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
- /*ReturnDevicePointer=*/false, C->isImplicit());
+ InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
+ C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
+ C->isImplicit(), std::get<2>(L), *EI);
+ ++EI;
}
+ }
// Look at the use_device_ptr clause information and mark the existing map
// entries as such. If there is no map information for an entry in the
@@ -7927,18 +8362,18 @@ public:
// emission of that entry until the whole struct has been processed.
llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
DeferredInfo;
- MapBaseValuesArrayTy UseDevicePtrBasePointers;
- MapValuesArrayTy UseDevicePtrPointers;
- MapValuesArrayTy UseDevicePtrSizes;
- MapFlagsArrayTy UseDevicePtrTypes;
+ MapCombinedInfoTy UseDevicePtrCombinedInfo;
for (const auto *C :
CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
for (const auto L : C->component_lists()) {
- assert(!L.second.empty() && "Not expecting empty list of components!");
- const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
+ std::get<1>(L);
+ assert(!Components.empty() &&
+ "Not expecting empty list of components!");
+ const ValueDecl *VD = Components.back().getAssociatedDeclaration();
VD = cast<ValueDecl>(VD->getCanonicalDecl());
- const Expr *IE = L.second.back().getAssociatedExpression();
+ const Expr *IE = Components.back().getAssociatedExpression();
// If the first component is a member expression, we have to look into
// 'this', which maps to null in the map of map information. Otherwise
// look directly for the information.
@@ -7982,18 +8417,19 @@ public:
// Nonetheless, generateInfoForComponentList must be called to take
// the pointer into account for the calculation of the range of the
// partial struct.
- InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
- /*ReturnDevicePointer=*/false, C->isImplicit());
+ InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None,
+ /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
} else {
llvm::Value *Ptr =
CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
- UseDevicePtrBasePointers.emplace_back(Ptr, VD);
- UseDevicePtrPointers.push_back(Ptr);
- UseDevicePtrSizes.push_back(
+ UseDevicePtrCombinedInfo.Exprs.push_back(VD);
+ UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
+ UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
+ UseDevicePtrCombinedInfo.Sizes.push_back(
llvm::Constant::getNullValue(CGF.Int64Ty));
- UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
- OMP_MAP_TARGET_PARAM);
+ UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
+ UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
}
}
}
@@ -8008,12 +8444,13 @@ public:
for (const auto *C :
CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
for (const auto L : C->component_lists()) {
- assert(!L.second.empty() && "Not expecting empty list of components!");
- const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+ assert(!std::get<1>(L).empty() &&
+ "Not expecting empty list of components!");
+ const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
if (!Processed.insert(VD).second)
continue;
VD = cast<ValueDecl>(VD->getCanonicalDecl());
- const Expr *IE = L.second.back().getAssociatedExpression();
+ const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
// If the first component is a member expression, we have to look into
// 'this', which maps to null in the map of map information. Otherwise
// look directly for the information.
@@ -8044,9 +8481,9 @@ public:
// Nonetheless, generateInfoForComponentList must be called to take
// the pointer into account for the calculation of the range of the
// partial struct.
- InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
- /*ReturnDevicePointer=*/false, C->isImplicit(),
- /*ForDeviceAddr=*/true);
+ InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
+ llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
+ nullptr, nullptr, /*ForDeviceAddr=*/true);
DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
} else {
llvm::Value *Ptr;
@@ -8054,26 +8491,23 @@ public:
Ptr = CGF.EmitLValue(IE).getPointer(CGF);
else
Ptr = CGF.EmitScalarExpr(IE);
- UseDevicePtrBasePointers.emplace_back(Ptr, VD);
- UseDevicePtrPointers.push_back(Ptr);
- UseDevicePtrSizes.push_back(
+ CombinedInfo.Exprs.push_back(VD);
+ CombinedInfo.BasePointers.emplace_back(Ptr, VD);
+ CombinedInfo.Pointers.push_back(Ptr);
+ CombinedInfo.Sizes.push_back(
llvm::Constant::getNullValue(CGF.Int64Ty));
- UseDevicePtrTypes.push_back(OMP_MAP_RETURN_PARAM |
- OMP_MAP_TARGET_PARAM);
+ CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
+ CombinedInfo.Mappers.push_back(nullptr);
}
}
}
for (const auto &M : Info) {
- // We need to know when we generate information for the first component
- // associated with a capture, because the mapping flags depend on it.
- bool IsFirstComponentList = true;
+ // Underlying variable declaration used in the map clause.
+ const ValueDecl *VD = std::get<0>(M);
- // Temporary versions of arrays
- MapBaseValuesArrayTy CurBasePointers;
- MapValuesArrayTy CurPointers;
- MapValuesArrayTy CurSizes;
- MapFlagsArrayTy CurTypes;
+ // Temporary generated information.
+ MapCombinedInfoTy CurInfo;
StructRangeInfoTy PartialStruct;
for (const MapInfo &L : M.second) {
@@ -8081,16 +8515,18 @@ public:
"Not expecting declaration with no component lists.");
// Remember the current base pointer index.
- unsigned CurrentBasePointersIdx = CurBasePointers.size();
+ unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
+ CurInfo.NonContigInfo.IsNonContiguous =
+ L.Components.back().isNonContiguous();
generateInfoForComponentList(
- L.MapType, L.MapModifiers, L.Components, CurBasePointers,
- CurPointers, CurSizes, CurTypes, PartialStruct,
- IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
+ L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
+ PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit,
+ L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
// If this entry relates with a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
if (L.ReturnDevicePointer) {
- assert(CurBasePointers.size() > CurrentBasePointersIdx &&
+ assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
"Unexpected number of mapped base pointers.");
const ValueDecl *RelevantVD =
@@ -8098,10 +8534,10 @@ public:
assert(RelevantVD &&
"No relevant declaration related with device pointer??");
- CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
- CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
+ CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
+ RelevantVD);
+ CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
}
- IsFirstComponentList = false;
}
// Append any pending zero-length pointers which are struct members and
@@ -8120,7 +8556,7 @@ public:
// Entry is RETURN_PARAM. Also, set the placeholder value
// MEMBER_OF=FFFF so that the entry is later updated with the
// correct value of MEMBER_OF.
- CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
+ CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
} else {
BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
@@ -8128,41 +8564,34 @@ public:
// Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
// value MEMBER_OF=FFFF so that the entry is later updated with the
// correct value of MEMBER_OF.
- CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
- OMP_MAP_MEMBER_OF);
+ CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
+ OMP_MAP_MEMBER_OF);
}
- CurBasePointers.emplace_back(BasePtr, L.VD);
- CurPointers.push_back(Ptr);
- CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
+ CurInfo.Exprs.push_back(L.VD);
+ CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
+ CurInfo.Pointers.push_back(Ptr);
+ CurInfo.Sizes.push_back(
+ llvm::Constant::getNullValue(this->CGF.Int64Ty));
+ CurInfo.Mappers.push_back(nullptr);
}
}
// If there is an entry in PartialStruct it means we have a struct with
// individual members mapped. Emit an extra combined entry.
if (PartialStruct.Base.isValid())
- emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
- PartialStruct);
+ emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
// We need to append the results of this capture to what we already have.
- BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
- Pointers.append(CurPointers.begin(), CurPointers.end());
- Sizes.append(CurSizes.begin(), CurSizes.end());
- Types.append(CurTypes.begin(), CurTypes.end());
+ CombinedInfo.append(CurInfo);
}
// Append data for use_device_ptr clauses.
- BasePointers.append(UseDevicePtrBasePointers.begin(),
- UseDevicePtrBasePointers.end());
- Pointers.append(UseDevicePtrPointers.begin(), UseDevicePtrPointers.end());
- Sizes.append(UseDevicePtrSizes.begin(), UseDevicePtrSizes.end());
- Types.append(UseDevicePtrTypes.begin(), UseDevicePtrTypes.end());
- }
-
- /// Generate all the base pointers, section pointers, sizes and map types for
- /// the extracted map clauses of user-defined mapper.
- void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
- MapValuesArrayTy &Pointers,
- MapValuesArrayTy &Sizes,
- MapFlagsArrayTy &Types) const {
+ CombinedInfo.append(UseDevicePtrCombinedInfo);
+ }
+
+ /// Generate all the base pointers, section pointers, sizes, map types, and
+ /// mappers for the extracted map clauses of user-defined mapper (all included
+ /// in \a CombinedInfo).
+ void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
"Expect a declare mapper directive");
const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
@@ -8171,25 +8600,22 @@ public:
// correctly. Therefore, we organize all lists in a map.
llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
- // Helper function to fill the information map for the different supported
- // clauses.
- auto &&InfoGen = [&Info](
- const ValueDecl *D,
- OMPClauseMappableExprCommon::MappableExprComponentListRef L,
- OpenMPMapClauseKind MapType,
- ArrayRef<OpenMPMapModifierKind> MapModifiers,
- bool ReturnDevicePointer, bool IsImplicit) {
- const ValueDecl *VD =
- D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
- Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
- IsImplicit);
- };
-
+ // Fill the information map for map clauses.
for (const auto *C : CurMapperDir->clauselists()) {
const auto *MC = cast<OMPMapClause>(C);
+ const auto *EI = MC->getVarRefs().begin();
for (const auto L : MC->component_lists()) {
- InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
- /*ReturnDevicePointer=*/false, MC->isImplicit());
+ // The Expression is not correct if the mapping is implicit
+ const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr;
+ const ValueDecl *VD =
+ std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
+ : nullptr;
+ // Get the corresponding user-defined mapper.
+ Info[VD].emplace_back(std::get<1>(L), MC->getMapType(),
+ MC->getMapTypeModifiers(), llvm::None,
+ /*ReturnDevicePointer=*/false, MC->isImplicit(),
+ std::get<2>(L), E);
+ ++EI;
}
}
@@ -8198,42 +8624,38 @@ public:
// associated with a capture, because the mapping flags depend on it.
bool IsFirstComponentList = true;
- // Temporary versions of arrays
- MapBaseValuesArrayTy CurBasePointers;
- MapValuesArrayTy CurPointers;
- MapValuesArrayTy CurSizes;
- MapFlagsArrayTy CurTypes;
+ // Underlying variable declaration used in the map clause.
+ const ValueDecl *VD = std::get<0>(M);
+
+ // Temporary generated information.
+ MapCombinedInfoTy CurInfo;
StructRangeInfoTy PartialStruct;
for (const MapInfo &L : M.second) {
assert(!L.Components.empty() &&
"Not expecting declaration with no component lists.");
generateInfoForComponentList(
- L.MapType, L.MapModifiers, L.Components, CurBasePointers,
- CurPointers, CurSizes, CurTypes, PartialStruct,
- IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
+ L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo,
+ PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper,
+ L.ForDeviceAddr, VD, L.VarRef);
IsFirstComponentList = false;
}
// If there is an entry in PartialStruct it means we have a struct with
// individual members mapped. Emit an extra combined entry.
- if (PartialStruct.Base.isValid())
- emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
- PartialStruct);
+ if (PartialStruct.Base.isValid()) {
+ CurInfo.NonContigInfo.Dims.push_back(0);
+ emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
+ }
// We need to append the results of this capture to what we already have.
- BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
- Pointers.append(CurPointers.begin(), CurPointers.end());
- Sizes.append(CurSizes.begin(), CurSizes.end());
- Types.append(CurTypes.begin(), CurTypes.end());
+ CombinedInfo.append(CurInfo);
}
}
/// Emit capture info for lambdas for variables captured by reference.
void generateInfoForLambdaCaptures(
- const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
- MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
- MapFlagsArrayTy &Types,
+ const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
const auto *RD = VD->getType()
.getCanonicalType()
@@ -8253,13 +8675,15 @@ public:
LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
VDLVal.getPointer(CGF));
- BasePointers.push_back(ThisLVal.getPointer(CGF));
- Pointers.push_back(ThisLValVal.getPointer(CGF));
- Sizes.push_back(
+ CombinedInfo.Exprs.push_back(VD);
+ CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
+ CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
+ CombinedInfo.Sizes.push_back(
CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
CGF.Int64Ty, /*isSigned=*/true));
- Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
- OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ CombinedInfo.Mappers.push_back(nullptr);
}
for (const LambdaCapture &LC : RD->captures()) {
if (!LC.capturesVariable())
@@ -8274,9 +8698,10 @@ public:
LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
VDLVal.getPointer(CGF));
- BasePointers.push_back(VarLVal.getPointer(CGF));
- Pointers.push_back(VarLValVal.getPointer(CGF));
- Sizes.push_back(CGF.Builder.CreateIntCast(
+ CombinedInfo.Exprs.push_back(VD);
+ CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
+ CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
+ CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(
VD->getType().getCanonicalType().getNonReferenceType()),
CGF.Int64Ty, /*isSigned=*/true));
@@ -8284,12 +8709,14 @@ public:
RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
VDLVal.getPointer(CGF));
- BasePointers.push_back(VarLVal.getPointer(CGF));
- Pointers.push_back(VarRVal.getScalarVal());
- Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
+ CombinedInfo.Exprs.push_back(VD);
+ CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
+ CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
+ CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
}
- Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
- OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+ OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+ CombinedInfo.Mappers.push_back(nullptr);
}
}
@@ -8322,13 +8749,10 @@ public:
}
}
- /// Generate the base pointers, section pointers, sizes and map types
- /// associated to a given capture.
+ /// Generate the base pointers, section pointers, sizes, map types, and
+ /// mappers associated to a given capture (all included in \a CombinedInfo).
void generateInfoForCapture(const CapturedStmt::Capture *Cap,
- llvm::Value *Arg,
- MapBaseValuesArrayTy &BasePointers,
- MapValuesArrayTy &Pointers,
- MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
+ llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
StructRangeInfoTy &PartialStruct) const {
assert(!Cap->capturesVariableArrayType() &&
"Not expecting to generate map info for a variable array type!");
@@ -8342,31 +8766,42 @@ public:
// pass the pointer by value. If it is a reference to a declaration, we just
// pass its value.
if (DevPointersMap.count(VD)) {
- BasePointers.emplace_back(Arg, VD);
- Pointers.push_back(Arg);
- Sizes.push_back(
- CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
- CGF.Int64Ty, /*isSigned=*/true));
- Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
+ CombinedInfo.Exprs.push_back(VD);
+ CombinedInfo.BasePointers.emplace_back(Arg, VD);
+ CombinedInfo.Pointers.push_back(Arg);
+ CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+ CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
+ /*isSigned=*/true));
+ CombinedInfo.Types.push_back(
+ (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
+ OMP_MAP_TARGET_PARAM);
+ CombinedInfo.Mappers.push_back(nullptr);
return;
}
using MapData =
std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
- OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
+ OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
+ const ValueDecl *, const Expr *>;
SmallVector<MapData, 4> DeclComponentLists;
assert(CurDir.is<const OMPExecutableDirective *>() &&
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
+ const auto *EI = C->getVarRefs().begin();
for (const auto L : C->decl_component_lists(VD)) {
- assert(L.first == VD &&
- "We got information for the wrong declaration??");
- assert(!L.second.empty() &&
+ const ValueDecl *VDecl, *Mapper;
+ // The Expression is not correct if the mapping is implicit
+ const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
+ OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+ std::tie(VDecl, Components, Mapper) = L;
+ assert(VDecl == VD && "We got information for the wrong declaration??");
+ assert(!Components.empty() &&
"Not expecting declaration with no component lists.");
- DeclComponentLists.emplace_back(L.second, C->getMapType(),
+ DeclComponentLists.emplace_back(Components, C->getMapType(),
C->getMapTypeModifiers(),
- C->isImplicit());
+ C->isImplicit(), Mapper, E);
+ ++EI;
}
}
@@ -8383,11 +8818,15 @@ public:
OpenMPMapClauseKind MapType;
ArrayRef<OpenMPMapModifierKind> MapModifiers;
bool IsImplicit;
- std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ const ValueDecl *Mapper;
+ const Expr *VarRef;
+ std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
+ L;
++Count;
for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
- std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
+ std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
+ VarRef) = L1;
auto CI = Components.rbegin();
auto CE = Components.rend();
auto SI = Components1.rbegin();
@@ -8473,14 +8912,17 @@ public:
OpenMPMapClauseKind MapType;
ArrayRef<OpenMPMapModifierKind> MapModifiers;
bool IsImplicit;
- std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ const ValueDecl *Mapper;
+ const Expr *VarRef;
+ std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
+ L;
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
OverlappedComponents = Pair.getSecond();
bool IsFirstComponentList = true;
generateInfoForComponentList(
- MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
- Types, PartialStruct, IsFirstComponentList, IsImplicit,
- /*ForDeviceAddr=*/false, OverlappedComponents);
+ MapType, MapModifiers, llvm::None, Components, CombinedInfo,
+ PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
+ /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
}
// Go through other elements without overlapped elements.
bool IsFirstComponentList = OverlappedData.empty();
@@ -8489,86 +8931,54 @@ public:
OpenMPMapClauseKind MapType;
ArrayRef<OpenMPMapModifierKind> MapModifiers;
bool IsImplicit;
- std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+ const ValueDecl *Mapper;
+ const Expr *VarRef;
+ std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
+ L;
auto It = OverlappedData.find(&L);
if (It == OverlappedData.end())
- generateInfoForComponentList(MapType, MapModifiers, Components,
- BasePointers, Pointers, Sizes, Types,
- PartialStruct, IsFirstComponentList,
- IsImplicit);
+ generateInfoForComponentList(MapType, MapModifiers, llvm::None,
+ Components, CombinedInfo, PartialStruct,
+ IsFirstComponentList, IsImplicit, Mapper,
+ /*ForDeviceAddr=*/false, VD, VarRef);
IsFirstComponentList = false;
}
}
- /// Generate the base pointers, section pointers, sizes and map types
- /// associated with the declare target link variables.
- void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
- MapValuesArrayTy &Pointers,
- MapValuesArrayTy &Sizes,
- MapFlagsArrayTy &Types) const {
- assert(CurDir.is<const OMPExecutableDirective *>() &&
- "Expect a executable directive");
- const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
- // Map other list items in the map clause which are not captured variables
- // but "declare target link" global variables.
- for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
- for (const auto L : C->component_lists()) {
- if (!L.first)
- continue;
- const auto *VD = dyn_cast<VarDecl>(L.first);
- if (!VD)
- continue;
- llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
- OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
- if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
- !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
- continue;
- StructRangeInfoTy PartialStruct;
- generateInfoForComponentList(
- C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
- Pointers, Sizes, Types, PartialStruct,
- /*IsFirstComponentList=*/true, C->isImplicit());
- assert(!PartialStruct.Base.isValid() &&
- "No partial structs for declare target link expected.");
- }
- }
- }
-
/// Generate the default map information for a given capture \a CI,
/// record field declaration \a RI and captured value \a CV.
void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
const FieldDecl &RI, llvm::Value *CV,
- MapBaseValuesArrayTy &CurBasePointers,
- MapValuesArrayTy &CurPointers,
- MapValuesArrayTy &CurSizes,
- MapFlagsArrayTy &CurMapTypes) const {
+ MapCombinedInfoTy &CombinedInfo) const {
bool IsImplicit = true;
// Do the default mapping.
if (CI.capturesThis()) {
- CurBasePointers.push_back(CV);
- CurPointers.push_back(CV);
+ CombinedInfo.Exprs.push_back(nullptr);
+ CombinedInfo.BasePointers.push_back(CV);
+ CombinedInfo.Pointers.push_back(CV);
const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
- CurSizes.push_back(
+ CombinedInfo.Sizes.push_back(
CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
CGF.Int64Ty, /*isSigned=*/true));
// Default map type.
- CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
+ CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
} else if (CI.capturesVariableByCopy()) {
- CurBasePointers.push_back(CV);
- CurPointers.push_back(CV);
+ const VarDecl *VD = CI.getCapturedVar();
+ CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
+ CombinedInfo.BasePointers.push_back(CV);
+ CombinedInfo.Pointers.push_back(CV);
if (!RI.getType()->isAnyPointerType()) {
// We have to signal to the runtime captures passed by value that are
// not pointers.
- CurMapTypes.push_back(OMP_MAP_LITERAL);
- CurSizes.push_back(CGF.Builder.CreateIntCast(
+ CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
+ CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
} else {
// Pointers are implicitly mapped with a zero size and no flags
// (other than first map that is added for all implicit maps).
- CurMapTypes.push_back(OMP_MAP_NONE);
- CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
+ CombinedInfo.Types.push_back(OMP_MAP_NONE);
+ CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
}
- const VarDecl *VD = CI.getCapturedVar();
auto I = FirstPrivateDecls.find(VD);
if (I != FirstPrivateDecls.end())
IsImplicit = I->getSecond();
@@ -8576,12 +8986,12 @@ public:
assert(CI.capturesVariable() && "Expected captured reference.");
const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
QualType ElementType = PtrTy->getPointeeType();
- CurSizes.push_back(CGF.Builder.CreateIntCast(
+ CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
// The default map type for a scalar/complex type is 'to' because by
// default the value doesn't have to be retrieved. For an aggregate
// type, the default is 'tofrom'.
- CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
+ CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
const VarDecl *VD = CI.getCapturedVar();
auto I = FirstPrivateDecls.find(VD);
if (I != FirstPrivateDecls.end() &&
@@ -8592,56 +9002,157 @@ public:
CGF.Builder.CreateMemCpy(
CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
- CurSizes.back(), /*IsVolatile=*/false);
+ CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
// Use new global variable as the base pointers.
- CurBasePointers.push_back(Addr);
- CurPointers.push_back(Addr);
+ CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
+ CombinedInfo.BasePointers.push_back(Addr);
+ CombinedInfo.Pointers.push_back(Addr);
} else {
- CurBasePointers.push_back(CV);
+ CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
+ CombinedInfo.BasePointers.push_back(CV);
if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
CV, ElementType, CGF.getContext().getDeclAlign(VD),
AlignmentSource::Decl));
- CurPointers.push_back(PtrAddr.getPointer());
+ CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
} else {
- CurPointers.push_back(CV);
+ CombinedInfo.Pointers.push_back(CV);
}
}
if (I != FirstPrivateDecls.end())
IsImplicit = I->getSecond();
}
// Every default map produces a single argument which is a target parameter.
- CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
+ CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
// Add flag stating this is an implicit map.
if (IsImplicit)
- CurMapTypes.back() |= OMP_MAP_IMPLICIT;
+ CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
+
+ // No user-defined mapper for default mapping.
+ CombinedInfo.Mappers.push_back(nullptr);
}
};
} // anonymous namespace
+static void emitNonContiguousDescriptor(
+ CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+ CGOpenMPRuntime::TargetDataInfo &Info) {
+ CodeGenModule &CGM = CGF.CGM;
+ MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
+ &NonContigInfo = CombinedInfo.NonContigInfo;
+
+ // Build an array of struct descriptor_dim and then assign it to
+ // offload_args.
+ //
+ // struct descriptor_dim {
+ // uint64_t offset;
+ // uint64_t count;
+ // uint64_t stride
+ // };
+ ASTContext &C = CGF.getContext();
+ QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
+ RecordDecl *RD;
+ RD = C.buildImplicitRecord("descriptor_dim");
+ RD->startDefinition();
+ addFieldToRecordDecl(C, RD, Int64Ty);
+ addFieldToRecordDecl(C, RD, Int64Ty);
+ addFieldToRecordDecl(C, RD, Int64Ty);
+ RD->completeDefinition();
+ QualType DimTy = C.getRecordType(RD);
+
+ enum { OffsetFD = 0, CountFD, StrideFD };
+ // We need two index variable here since the size of "Dims" is the same as the
+ // size of Components, however, the size of offset, count, and stride is equal
+ // to the size of base declaration that is non-contiguous.
+ for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
+ // Skip emitting ir if dimension size is 1 since it cannot be
+ // non-contiguous.
+ if (NonContigInfo.Dims[I] == 1)
+ continue;
+ llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
+ QualType ArrayTy =
+ C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
+ Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
+ for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
+ unsigned RevIdx = EE - II - 1;
+ LValue DimsLVal = CGF.MakeAddrLValue(
+ CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
+ // Offset
+ LValue OffsetLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), OffsetFD));
+ CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
+ // Count
+ LValue CountLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), CountFD));
+ CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
+ // Stride
+ LValue StrideLVal = CGF.EmitLValueForField(
+ DimsLVal, *std::next(RD->field_begin(), StrideFD));
+ CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
+ }
+ // args[I] = &dims
+ Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ DimsAddr, CGM.Int8PtrTy);
+ llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
+ llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
+ Info.PointersArray, 0, I);
+ Address PAddr(P, CGF.getPointerAlign());
+ CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
+ ++L;
+ }
+}
+
+/// Emit a string constant containing the names of the values mapped to the
+/// offloading runtime library.
+llvm::Constant *
+emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
+ MappableExprsHandler::MappingExprInfo &MapExprs) {
+ llvm::Constant *SrcLocStr;
+ if (!MapExprs.getMapDecl()) {
+ SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr();
+ } else {
+ std::string ExprName = "";
+ if (MapExprs.getMapExpr()) {
+ PrintingPolicy P(CGF.getContext().getLangOpts());
+ llvm::raw_string_ostream OS(ExprName);
+ MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
+ OS.flush();
+ } else {
+ ExprName = MapExprs.getMapDecl()->getNameAsString();
+ }
+
+ SourceLocation Loc = MapExprs.getMapDecl()->getLocation();
+ PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
+ const char *FileName = PLoc.getFilename();
+ unsigned Line = PLoc.getLine();
+ unsigned Column = PLoc.getColumn();
+ SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(),
+ Line, Column);
+ }
+
+ return SrcLocStr;
+}
+
/// Emit the arrays used to pass the captures and map information to the
/// offloading runtime library. If there is no map or capture information,
/// return nullptr by reference.
-static void
-emitOffloadingArrays(CodeGenFunction &CGF,
- MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
- MappableExprsHandler::MapValuesArrayTy &Pointers,
- MappableExprsHandler::MapValuesArrayTy &Sizes,
- MappableExprsHandler::MapFlagsArrayTy &MapTypes,
- CGOpenMPRuntime::TargetDataInfo &Info) {
+static void emitOffloadingArrays(
+ CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
+ CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
+ bool IsNonContiguous = false) {
CodeGenModule &CGM = CGF.CGM;
ASTContext &Ctx = CGF.getContext();
// Reset the array information.
Info.clearArrayInfo();
- Info.NumberOfPtrs = BasePointers.size();
+ Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
if (Info.NumberOfPtrs) {
// Detect if we have any capture size requiring runtime evaluation of the
// size so that a constant array could be eventually used.
bool hasRuntimeEvaluationCaptureSize = false;
- for (llvm::Value *S : Sizes)
+ for (llvm::Value *S : CombinedInfo.Sizes)
if (!isa<llvm::Constant>(S)) {
hasRuntimeEvaluationCaptureSize = true;
break;
@@ -8656,6 +9167,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
Info.PointersArray =
CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
+ Address MappersArray =
+ CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
+ Info.MappersArray = MappersArray.getPointer();
// If we don't have any VLA types or other types that require runtime
// evaluation, we can use a constant array for the map sizes, otherwise we
@@ -8672,8 +9186,15 @@ emitOffloadingArrays(CodeGenFunction &CGF,
// We expect all the sizes to be constant, so we collect them to create
// a constant array.
SmallVector<llvm::Constant *, 16> ConstSizes;
- for (llvm::Value *S : Sizes)
- ConstSizes.push_back(cast<llvm::Constant>(S));
+ for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
+ if (IsNonContiguous &&
+ (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
+ ConstSizes.push_back(llvm::ConstantInt::get(
+ CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
+ } else {
+ ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
+ }
+ }
auto *SizesArrayInit = llvm::ConstantArray::get(
llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
@@ -8688,8 +9209,8 @@ emitOffloadingArrays(CodeGenFunction &CGF,
// The map types are always constant so we don't need to generate code to
// fill arrays. Instead, we create an array constant.
- SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
- llvm::copy(MapTypes, Mapping.begin());
+ SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
+ llvm::copy(CombinedInfo.Types, Mapping.begin());
llvm::Constant *MapTypesArrayInit =
llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
std::string MaptypesName =
@@ -8701,8 +9222,57 @@ emitOffloadingArrays(CodeGenFunction &CGF,
MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
Info.MapTypesArray = MapTypesArrayGbl;
+ // The information types are only built if there is debug information
+ // requested.
+ if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
+ Info.MapNamesArray = llvm::Constant::getNullValue(
+ llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
+ } else {
+ auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
+ return emitMappingInformation(CGF, OMPBuilder, MapExpr);
+ };
+ SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
+ llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
+
+ llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
+ llvm::ArrayType::get(
+ llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(),
+ CombinedInfo.Exprs.size()),
+ InfoMap);
+ auto *MapNamesArrayGbl = new llvm::GlobalVariable(
+ CGM.getModule(), MapNamesArrayInit->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+ MapNamesArrayInit,
+ CGM.getOpenMPRuntime().getName({"offload_mapnames"}));
+ Info.MapNamesArray = MapNamesArrayGbl;
+ }
+
+ // If there's a present map type modifier, it must not be applied to the end
+ // of a region, so generate a separate map type array in that case.
+ if (Info.separateBeginEndCalls()) {
+ bool EndMapTypesDiffer = false;
+ for (uint64_t &Type : Mapping) {
+ if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
+ Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
+ EndMapTypesDiffer = true;
+ }
+ }
+ if (EndMapTypesDiffer) {
+ MapTypesArrayInit =
+ llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
+ MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"});
+ MapTypesArrayGbl = new llvm::GlobalVariable(
+ CGM.getModule(), MapTypesArrayInit->getType(),
+ /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
+ MapTypesArrayInit, MaptypesName);
+ MapTypesArrayGbl->setUnnamedAddr(
+ llvm::GlobalValue::UnnamedAddr::Global);
+ Info.MapTypesArrayEnd = MapTypesArrayGbl;
+ }
+ }
+
for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
- llvm::Value *BPVal = *BasePointers[I];
+ llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.BasePointersArray, 0, I);
@@ -8712,10 +9282,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
CGF.Builder.CreateStore(BPVal, BPAddr);
if (Info.requiresDevicePointerInfo())
- if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
+ if (const ValueDecl *DevVD =
+ CombinedInfo.BasePointers[I].getDevicePtrDecl())
Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
- llvm::Value *PVal = Pointers[I];
+ llvm::Value *PVal = CombinedInfo.Pointers[I];
llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
Info.PointersArray, 0, I);
@@ -8731,20 +9302,53 @@ emitOffloadingArrays(CodeGenFunction &CGF,
/*Idx0=*/0,
/*Idx1=*/I);
Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
- CGF.Builder.CreateStore(
- CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
- SAddr);
+ CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
+ CGM.Int64Ty,
+ /*isSigned=*/true),
+ SAddr);
+ }
+
+ // Fill up the mapper array.
+ llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+ if (CombinedInfo.Mappers[I]) {
+ MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+ MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
+ Info.HasMapper = true;
}
+ Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
+ CGF.Builder.CreateStore(MFunc, MAddr);
}
}
+
+ if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
+ Info.NumberOfPtrs == 0)
+ return;
+
+ emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
}
+namespace {
+/// Additional arguments for emitOffloadingArraysArgument function.
+struct ArgumentsOptions {
+ bool ForEndCall = false;
+ ArgumentsOptions() = default;
+ ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
+};
+} // namespace
+
/// Emit the arguments to be passed to the runtime library based on the
-/// arrays of pointers, sizes and map types.
+/// arrays of base pointers, pointers, sizes, map types, and mappers. If
+/// ForEndCall, emit map types to be passed for the end of the region instead of
+/// the beginning.
static void emitOffloadingArraysArgument(
CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
- llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
+ llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
+ llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
+ const ArgumentsOptions &Options = ArgumentsOptions()) {
+ assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&
+ "expected region end call to runtime only when end call is separate");
CodeGenModule &CGM = CGF.CGM;
if (Info.NumberOfPtrs) {
BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
@@ -8761,15 +9365,36 @@ static void emitOffloadingArraysArgument(
/*Idx0=*/0, /*Idx1=*/0);
MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
- Info.MapTypesArray,
+ Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
+ : Info.MapTypesArray,
/*Idx0=*/0,
/*Idx1=*/0);
+
+ // Only emit the mapper information arrays if debug information is
+ // requested.
+ if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
+ MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+ else
+ MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
+ llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
+ Info.MapNamesArray,
+ /*Idx0=*/0,
+ /*Idx1=*/0);
+ // If there is no user-defined mapper, set the mapper array to nullptr to
+ // avoid an unnecessary data privatization
+ if (!Info.HasMapper)
+ MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+ else
+ MappersArrayArg =
+ CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
} else {
BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
MapTypesArrayArg =
llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
+ MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
+ MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
}
}
@@ -8885,7 +9510,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
/// \code
/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
/// void *base, void *begin,
-/// int64_t size, int64_t type) {
+/// int64_t size, int64_t type,
+/// void *name = nullptr) {
/// // Allocate space for an array section first.
/// if (size > 1 && !maptype.IsDelete)
/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
@@ -8896,10 +9522,11 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
/// for (auto c : all_components) {
/// if (c.hasMapper())
/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
-/// c.arg_type);
+/// c.arg_type, c.arg_name);
/// else
/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
-/// c.arg_begin, c.arg_size, c.arg_type);
+/// c.arg_begin, c.arg_size, c.arg_type,
+/// c.arg_name);
/// }
/// }
/// // Delete the array section.
@@ -8932,12 +9559,15 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
ImplicitParamDecl::Other);
ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
ImplicitParamDecl::Other);
+ ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
+ ImplicitParamDecl::Other);
FunctionArgList Args;
Args.push_back(&HandleArg);
Args.push_back(&BaseArg);
Args.push_back(&BeginArg);
Args.push_back(&SizeArg);
Args.push_back(&TypeArg);
+ Args.push_back(&NameArg);
const CGFunctionInfo &FnInfo =
CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
@@ -8956,6 +9586,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
C.getPointerType(Int64Ty), Loc);
+ // Convert the size in bytes into the number of array elements.
+ Size = MapperCGF.Builder.CreateExactUDiv(
+ Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
@@ -8994,6 +9627,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
// Emit the loop body block.
MapperCGF.EmitBlock(BodyBB);
+ llvm::BasicBlock *LastBB = BodyBB;
llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
PtrPHI->addIncoming(PtrBegin, EntryBB);
@@ -9011,12 +9645,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
(void)Scope.Privatize();
// Get map clause information. Fill up the arrays with all mapped variables.
- MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
- MappableExprsHandler::MapValuesArrayTy Pointers;
- MappableExprsHandler::MapValuesArrayTy Sizes;
- MappableExprsHandler::MapFlagsArrayTy MapTypes;
+ MappableExprsHandler::MapCombinedInfoTy Info;
MappableExprsHandler MEHandler(*D, MapperCGF);
- MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
+ MEHandler.generateAllInfoForMapper(Info);
// Call the runtime API __tgt_mapper_num_components to get the number of
// pre-existing components.
@@ -9030,17 +9661,21 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
// Fill up the runtime mapper handle for all components.
- for (unsigned I = 0; I < BasePointers.size(); ++I) {
+ for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
- *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
- Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
- llvm::Value *CurSizeArg = Sizes[I];
+ Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+ llvm::Value *CurSizeArg = Info.Sizes[I];
+ llvm::Value *CurNameArg =
+ (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
+ ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
+ : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
// Extract the MEMBER_OF field from the map type.
llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
MapperCGF.EmitBlock(MemberBB);
- llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
+ llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
llvm::Value *Member = MapperCGF.Builder.CreateAnd(
OriMapType,
MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
@@ -9116,6 +9751,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
// In case of tofrom, do nothing.
MapperCGF.EmitBlock(EndBB);
+ LastBB = EndBB;
llvm::PHINode *CurMapType =
MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
CurMapType->addIncoming(AllocMapType, AllocBB);
@@ -9123,23 +9759,29 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
CurMapType->addIncoming(FromMapType, FromBB);
CurMapType->addIncoming(MemberMapType, ToElseBB);
- // TODO: call the corresponding mapper function if a user-defined mapper is
- // associated with this map clause.
- // Call the runtime API __tgt_push_mapper_component to fill up the runtime
- // data structure.
- llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
- CurSizeArg, CurMapType};
- MapperCGF.EmitRuntimeCall(
- OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___tgt_push_mapper_component),
- OffloadingArgs);
+ llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
+ CurSizeArg, CurMapType, CurNameArg};
+ if (Info.Mappers[I]) {
+ // Call the corresponding mapper function.
+ llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
+ cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
+ assert(MapperFunc && "Expect a valid mapper function is available.");
+ MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
+ } else {
+ // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+ // data structure.
+ MapperCGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___tgt_push_mapper_component),
+ OffloadingArgs);
+ }
}
// Update the pointer to point to the next element that needs to be mapped,
// and check whether we have mapped all elements.
llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
- PtrPHI->addIncoming(PtrNext, BodyBB);
+ PtrPHI->addIncoming(PtrNext, LastBB);
llvm::Value *IsDone =
MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
@@ -9208,15 +9850,27 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
MapType,
MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
MappableExprsHandler::OMP_MAP_FROM)));
+ llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+
// Call the runtime API __tgt_push_mapper_component to fill up the runtime
// data structure.
- llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, ArraySize, MapTypeArg};
+ llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
+ ArraySize, MapTypeArg, MapNameArg};
MapperCGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
OMPRTL___tgt_push_mapper_component),
OffloadingArgs);
}
+llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
+ const OMPDeclareMapperDecl *D) {
+ auto I = UDMMap.find(D);
+ if (I != UDMMap.end())
+ return I->second;
+ emitUserDefinedMapper(D);
+ return UDMMap.lookup(D);
+}
+
void CGOpenMPRuntime::emitTargetNumIterationsCall(
CodeGenFunction &CGF, const OMPExecutableDirective &D,
llvm::Value *DeviceID,
@@ -9231,10 +9885,11 @@ void CGOpenMPRuntime::emitTargetNumIterationsCall(
if (!TD)
return;
const auto *LD = cast<OMPLoopDirective>(TD);
- auto &&CodeGen = [LD, DeviceID, SizeEmitter, this](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
+ PrePostActionTy &) {
if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
- llvm::Value *Args[] = {DeviceID, NumIterations};
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
+ llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_push_target_tripcount),
@@ -9256,7 +9911,8 @@ void CGOpenMPRuntime::emitTargetCall(
assert(OutlinedFn && "Invalid outlined function!");
- const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
+ const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
+ D.hasClausesOfKind<OMPNowaitClause>();
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
@@ -9267,9 +9923,11 @@ void CGOpenMPRuntime::emitTargetCall(
CodeGenFunction::OMPTargetDataInfo InputInfo;
llvm::Value *MapTypesArray = nullptr;
+ llvm::Value *MapNamesArray = nullptr;
// Fill up the pointer arrays and transfer execution to the device.
auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
- &MapTypesArray, &CS, RequiresOuterTask, &CapturedVars,
+ &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
+ &CapturedVars,
SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
if (Device.getInt() == OMPC_DEVICE_ancestor) {
// Reverse offloading is not supported, so just execute on the host.
@@ -9316,6 +9974,9 @@ void CGOpenMPRuntime::emitTargetCall(
llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
+ // Source location for the ident struct
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
+
// Emit tripcount for the target loop-based directive.
emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
@@ -9355,32 +10016,39 @@ void CGOpenMPRuntime::emitTargetCall(
// passed to the runtime library - a 32-bit integer with the value zero.
assert(NumThreads && "Thread limit expression should be available along "
"with number of teams.");
- llvm::Value *OffloadingArgs[] = {DeviceID,
+ llvm::Value *OffloadingArgs[] = {RTLoc,
+ DeviceID,
OutlinedFnID,
PointerNum,
InputInfo.BasePointersArray.getPointer(),
InputInfo.PointersArray.getPointer(),
InputInfo.SizesArray.getPointer(),
MapTypesArray,
+ MapNamesArray,
+ InputInfo.MappersArray.getPointer(),
NumTeams,
NumThreads};
Return = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
- : OMPRTL___tgt_target_teams),
+ CGM.getModule(), HasNowait
+ ? OMPRTL___tgt_target_teams_nowait_mapper
+ : OMPRTL___tgt_target_teams_mapper),
OffloadingArgs);
} else {
- llvm::Value *OffloadingArgs[] = {DeviceID,
+ llvm::Value *OffloadingArgs[] = {RTLoc,
+ DeviceID,
OutlinedFnID,
PointerNum,
InputInfo.BasePointersArray.getPointer(),
InputInfo.PointersArray.getPointer(),
InputInfo.SizesArray.getPointer(),
- MapTypesArray};
+ MapTypesArray,
+ MapNamesArray,
+ InputInfo.MappersArray.getPointer()};
Return = CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(),
- HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
+ CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
+ : OMPRTL___tgt_target_mapper),
OffloadingArgs);
}
@@ -9415,96 +10083,96 @@ void CGOpenMPRuntime::emitTargetCall(
};
auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
- &CapturedVars, RequiresOuterTask,
+ &MapNamesArray, &CapturedVars, RequiresOuterTask,
&CS](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the captured variables.
- MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
- MappableExprsHandler::MapValuesArrayTy Pointers;
- MappableExprsHandler::MapValuesArrayTy Sizes;
- MappableExprsHandler::MapFlagsArrayTy MapTypes;
+ MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
// Get mappable expression information.
MappableExprsHandler MEHandler(D, CGF);
llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
auto RI = CS.getCapturedRecordDecl()->field_begin();
auto CV = CapturedVars.begin();
for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
CE = CS.capture_end();
CI != CE; ++CI, ++RI, ++CV) {
- MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
- MappableExprsHandler::MapValuesArrayTy CurPointers;
- MappableExprsHandler::MapValuesArrayTy CurSizes;
- MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
+ MappableExprsHandler::MapCombinedInfoTy CurInfo;
MappableExprsHandler::StructRangeInfoTy PartialStruct;
// VLA sizes are passed to the outlined region by copy and do not have map
// information associated.
if (CI->capturesVariableArrayType()) {
- CurBasePointers.push_back(*CV);
- CurPointers.push_back(*CV);
- CurSizes.push_back(CGF.Builder.CreateIntCast(
+ CurInfo.Exprs.push_back(nullptr);
+ CurInfo.BasePointers.push_back(*CV);
+ CurInfo.Pointers.push_back(*CV);
+ CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
// Copy to the device as an argument. No need to retrieve it.
- CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
- MappableExprsHandler::OMP_MAP_TARGET_PARAM |
- MappableExprsHandler::OMP_MAP_IMPLICIT);
+ CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
+ MappableExprsHandler::OMP_MAP_TARGET_PARAM |
+ MappableExprsHandler::OMP_MAP_IMPLICIT);
+ CurInfo.Mappers.push_back(nullptr);
} else {
// If we have any information in the map clause, we use it, otherwise we
// just do a default mapping.
- MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
- CurSizes, CurMapTypes, PartialStruct);
- if (CurBasePointers.empty())
- MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
- CurPointers, CurSizes, CurMapTypes);
+ MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+ if (!CI->capturesThis())
+ MappedVarSet.insert(CI->getCapturedVar());
+ else
+ MappedVarSet.insert(nullptr);
+ if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
+ MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
// Generate correct mapping for variables captured by reference in
// lambdas.
if (CI->capturesVariable())
- MEHandler.generateInfoForLambdaCaptures(
- CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
- CurMapTypes, LambdaPointers);
+ MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+ CurInfo, LambdaPointers);
}
// We expect to have at least an element of information for this capture.
- assert(!CurBasePointers.empty() &&
+ assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
"Non-existing map pointer for capture!");
- assert(CurBasePointers.size() == CurPointers.size() &&
- CurBasePointers.size() == CurSizes.size() &&
- CurBasePointers.size() == CurMapTypes.size() &&
+ assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+ CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
"Inconsistent map information sizes!");
// If there is an entry in PartialStruct it means we have a struct with
// individual members mapped. Emit an extra combined entry.
if (PartialStruct.Base.isValid())
- MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
- CurMapTypes, PartialStruct);
+ MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
+ nullptr, /*NoTargetParam=*/false);
// We need to append the results of this capture to what we already have.
- BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
- Pointers.append(CurPointers.begin(), CurPointers.end());
- Sizes.append(CurSizes.begin(), CurSizes.end());
- MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
+ CombinedInfo.append(CurInfo);
}
// Adjust MEMBER_OF flags for the lambdas captures.
- MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
- Pointers, MapTypes);
- // Map other list items in the map clause which are not captured variables
- // but "declare target link" global variables.
- MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
- MapTypes);
+ MEHandler.adjustMemberOfForLambdaCaptures(
+ LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
+ CombinedInfo.Types);
+ // Map any list items in a map clause that were not captures because they
+ // weren't referenced within the construct.
+ MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
TargetDataInfo Info;
// Fill up the arrays and create the arguments.
- emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
- emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
- Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, Info);
+ emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
+ emitOffloadingArraysArgument(
+ CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
+ Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
+ {/*ForEndTask=*/false});
+
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray =
Address(Info.BasePointersArray, CGM.getPointerAlign());
InputInfo.PointersArray =
Address(Info.PointersArray, CGM.getPointerAlign());
InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
+ InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
MapTypesArray = Info.MapTypesArray;
+ MapNamesArray = Info.MapNamesArray;
if (RequiresOuterTask)
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
else
@@ -9673,8 +10341,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
return;
- scanForTargetRegionsFunctions(
- E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
+ scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
return;
}
@@ -10092,24 +10759,25 @@ void CGOpenMPRuntime::emitTargetDataCalls(
auto &&BeginThenGen = [this, &D, Device, &Info,
&CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
- MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
- MappableExprsHandler::MapValuesArrayTy Pointers;
- MappableExprsHandler::MapValuesArrayTy Sizes;
- MappableExprsHandler::MapFlagsArrayTy MapTypes;
+ MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
// Get map clause information.
- MappableExprsHandler MCHandler(D, CGF);
- MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+ MappableExprsHandler MEHandler(D, CGF);
+ MEHandler.generateAllInfo(CombinedInfo);
// Fill up the arrays and create the arguments.
- emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+ emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
+ /*IsNonContiguous=*/true);
llvm::Value *BasePointersArrayArg = nullptr;
llvm::Value *PointersArrayArg = nullptr;
llvm::Value *SizesArrayArg = nullptr;
llvm::Value *MapTypesArrayArg = nullptr;
+ llvm::Value *MapNamesArrayArg = nullptr;
+ llvm::Value *MappersArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
- SizesArrayArg, MapTypesArrayArg, Info);
+ SizesArrayArg, MapTypesArrayArg,
+ MapNamesArrayArg, MappersArrayArg, Info);
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
@@ -10122,13 +10790,23 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Emit the number of elements in the offloading arrays.
llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
+ //
+ // Source location for the ident struct
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
- llvm::Value *OffloadingArgs[] = {
- DeviceID, PointerNum, BasePointersArrayArg,
- PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___tgt_target_data_begin),
- OffloadingArgs);
+ llvm::Value *OffloadingArgs[] = {RTLoc,
+ DeviceID,
+ PointerNum,
+ BasePointersArrayArg,
+ PointersArrayArg,
+ SizesArrayArg,
+ MapTypesArrayArg,
+ MapNamesArrayArg,
+ MappersArrayArg};
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
+ OffloadingArgs);
// If device pointer privatization is required, emit the body of the region
// here. It will have to be duplicated: with and without privatization.
@@ -10137,16 +10815,20 @@ void CGOpenMPRuntime::emitTargetDataCalls(
};
// Generate code for the closing of the data region.
- auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
+ PrePostActionTy &) {
assert(Info.isValid() && "Invalid data environment closing arguments.");
llvm::Value *BasePointersArrayArg = nullptr;
llvm::Value *PointersArrayArg = nullptr;
llvm::Value *SizesArrayArg = nullptr;
llvm::Value *MapTypesArrayArg = nullptr;
+ llvm::Value *MapNamesArrayArg = nullptr;
+ llvm::Value *MappersArrayArg = nullptr;
emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
- SizesArrayArg, MapTypesArrayArg, Info);
+ SizesArrayArg, MapTypesArrayArg,
+ MapNamesArrayArg, MappersArrayArg, Info,
+ {/*ForEndCall=*/true});
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
@@ -10160,12 +10842,22 @@ void CGOpenMPRuntime::emitTargetDataCalls(
// Emit the number of elements in the offloading arrays.
llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
- llvm::Value *OffloadingArgs[] = {
- DeviceID, PointerNum, BasePointersArrayArg,
- PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___tgt_target_data_end),
- OffloadingArgs);
+ // Source location for the ident struct
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
+
+ llvm::Value *OffloadingArgs[] = {RTLoc,
+ DeviceID,
+ PointerNum,
+ BasePointersArrayArg,
+ PointersArrayArg,
+ SizesArrayArg,
+ MapTypesArrayArg,
+ MapNamesArrayArg,
+ MappersArrayArg};
+ CGF.EmitRuntimeCall(
+ OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
+ OffloadingArgs);
};
// If we need device pointer privatization, we need to emit the body of the
@@ -10218,9 +10910,10 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
CodeGenFunction::OMPTargetDataInfo InputInfo;
llvm::Value *MapTypesArray = nullptr;
+ llvm::Value *MapNamesArray = nullptr;
// Generate the code for the opening of the data environment.
- auto &&ThenGen = [this, &D, Device, &InputInfo,
- &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
+ &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
// Emit device ID if any.
llvm::Value *DeviceID = nullptr;
if (Device) {
@@ -10234,29 +10927,35 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
llvm::Constant *PointerNum =
CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
- llvm::Value *OffloadingArgs[] = {DeviceID,
+ // Source location for the ident struct
+ llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
+
+ llvm::Value *OffloadingArgs[] = {RTLoc,
+ DeviceID,
PointerNum,
InputInfo.BasePointersArray.getPointer(),
InputInfo.PointersArray.getPointer(),
InputInfo.SizesArray.getPointer(),
- MapTypesArray};
+ MapTypesArray,
+ MapNamesArray,
+ InputInfo.MappersArray.getPointer()};
- // Select the right runtime function call for each expected standalone
+ // Select the right runtime function call for each standalone
// directive.
const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
RuntimeFunction RTLFn;
switch (D.getDirectiveKind()) {
case OMPD_target_enter_data:
- RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
- : OMPRTL___tgt_target_data_begin;
+ RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
+ : OMPRTL___tgt_target_data_begin_mapper;
break;
case OMPD_target_exit_data:
- RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
- : OMPRTL___tgt_target_data_end;
+ RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
+ : OMPRTL___tgt_target_data_end_mapper;
break;
case OMPD_target_update:
- RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
- : OMPRTL___tgt_target_data_update;
+ RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
+ : OMPRTL___tgt_target_data_update_mapper;
break;
case OMPD_parallel:
case OMPD_for:
@@ -10330,24 +11029,26 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
OffloadingArgs);
};
- auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
- CodeGenFunction &CGF, PrePostActionTy &) {
+ auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
+ &MapNamesArray](CodeGenFunction &CGF,
+ PrePostActionTy &) {
// Fill up the arrays with all the mapped variables.
- MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
- MappableExprsHandler::MapValuesArrayTy Pointers;
- MappableExprsHandler::MapValuesArrayTy Sizes;
- MappableExprsHandler::MapFlagsArrayTy MapTypes;
+ MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
// Get map clause information.
MappableExprsHandler MEHandler(D, CGF);
- MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+ MEHandler.generateAllInfo(CombinedInfo);
TargetDataInfo Info;
// Fill up the arrays and create the arguments.
- emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
- emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
- Info.PointersArray, Info.SizesArray,
- Info.MapTypesArray, Info);
+ emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
+ /*IsNonContiguous=*/true);
+ bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
+ D.hasClausesOfKind<OMPNowaitClause>();
+ emitOffloadingArraysArgument(
+ CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
+ Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
+ {/*ForEndTask=*/false});
InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
InputInfo.BasePointersArray =
Address(Info.BasePointersArray, CGM.getPointerAlign());
@@ -10355,8 +11056,10 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
Address(Info.PointersArray, CGM.getPointerAlign());
InputInfo.SizesArray =
Address(Info.SizesArray, CGM.getPointerAlign());
+ InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
MapTypesArray = Info.MapTypesArray;
- if (D.hasClausesOfKind<OMPDependClause>())
+ MapNamesArray = Info.MapNamesArray;
+ if (RequiresOuterTask)
CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
else
emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
@@ -11095,87 +11798,115 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
return CGF.GetAddrOfLocalVar(NativeParam);
}
-namespace {
-/// Cleanup action for allocate support.
-class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
-public:
- static const int CleanupArgs = 3;
-
-private:
- llvm::FunctionCallee RTLFn;
- llvm::Value *Args[CleanupArgs];
-
-public:
- OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
- ArrayRef<llvm::Value *> CallArgs)
- : RTLFn(RTLFn) {
- assert(CallArgs.size() == CleanupArgs &&
- "Size of arguments does not match.");
- std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
- }
- void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
- if (!CGF.HaveInsertPoint())
- return;
- CGF.EmitRuntimeCall(RTLFn, Args);
- }
-};
-} // namespace
-
Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
const VarDecl *VD) {
if (!VD)
return Address::invalid();
+ Address UntiedAddr = Address::invalid();
+ Address UntiedRealAddr = Address::invalid();
+ auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
+ if (It != FunctionToUntiedTaskStackMap.end()) {
+ const UntiedLocalVarsAddressesMap &UntiedData =
+ UntiedLocalVarsStack[It->second];
+ auto I = UntiedData.find(VD);
+ if (I != UntiedData.end()) {
+ UntiedAddr = I->second.first;
+ UntiedRealAddr = I->second.second;
+ }
+ }
const VarDecl *CVD = VD->getCanonicalDecl();
- if (!CVD->hasAttr<OMPAllocateDeclAttr>())
- return Address::invalid();
- const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
- // Use the default allocation.
- if ((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
- AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
- !AA->getAllocator())
- return Address::invalid();
- llvm::Value *Size;
- CharUnits Align = CGM.getContext().getDeclAlign(CVD);
- if (CVD->getType()->isVariablyModifiedType()) {
- Size = CGF.getTypeSize(CVD->getType());
- // Align the size: ((size + align - 1) / align) * align
- Size = CGF.Builder.CreateNUWAdd(
- Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
- Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
- Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
- } else {
- CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
- Size = CGM.getSize(Sz.alignTo(Align));
+ if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
+ // Use the default allocation.
+ if (!isAllocatableDecl(VD))
+ return UntiedAddr;
+ llvm::Value *Size;
+ CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+ if (CVD->getType()->isVariablyModifiedType()) {
+ Size = CGF.getTypeSize(CVD->getType());
+ // Align the size: ((size + align - 1) / align) * align
+ Size = CGF.Builder.CreateNUWAdd(
+ Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
+ Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
+ Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
+ } else {
+ CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+ Size = CGM.getSize(Sz.alignTo(Align));
+ }
+ llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
+ const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+ assert(AA->getAllocator() &&
+ "Expected allocator expression for non-default allocator.");
+ llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+ // According to the standard, the original allocator type is a enum
+ // (integer). Convert to pointer type, if required.
+ Allocator = CGF.EmitScalarConversion(
+ Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy,
+ AA->getAllocator()->getExprLoc());
+ llvm::Value *Args[] = {ThreadID, Size, Allocator};
+
+ llvm::Value *Addr =
+ CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_alloc),
+ Args, getName({CVD->getName(), ".void.addr"}));
+ llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
+ CGM.getModule(), OMPRTL___kmpc_free);
+ QualType Ty = CGM.getContext().getPointerType(CVD->getType());
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
+ if (UntiedAddr.isValid())
+ CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
+
+ // Cleanup action for allocate support.
+ class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
+ llvm::FunctionCallee RTLFn;
+ unsigned LocEncoding;
+ Address Addr;
+ const Expr *Allocator;
+
+ public:
+ OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding,
+ Address Addr, const Expr *Allocator)
+ : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
+ Allocator(Allocator) {}
+ void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
+ if (!CGF.HaveInsertPoint())
+ return;
+ llvm::Value *Args[3];
+ Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
+ CGF, SourceLocation::getFromRawEncoding(LocEncoding));
+ Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr.getPointer(), CGF.VoidPtrTy);
+ llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator);
+ // According to the standard, the original allocator type is a enum
+ // (integer). Convert to pointer type, if required.
+ AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
+ CGF.getContext().VoidPtrTy,
+ Allocator->getExprLoc());
+ Args[2] = AllocVal;
+
+ CGF.EmitRuntimeCall(RTLFn, Args);
+ }
+ };
+ Address VDAddr =
+ UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
+ CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
+ NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
+ VDAddr, AA->getAllocator());
+ if (UntiedRealAddr.isValid())
+ if (auto *Region =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
+ Region->emitUntiedSwitch(CGF);
+ return VDAddr;
}
- llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
- assert(AA->getAllocator() &&
- "Expected allocator expression for non-default allocator.");
- llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
- // According to the standard, the original allocator type is a enum (integer).
- // Convert to pointer type, if required.
- if (Allocator->getType()->isIntegerTy())
- Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
- else if (Allocator->getType()->isPointerTy())
- Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
- CGM.VoidPtrTy);
- llvm::Value *Args[] = {ThreadID, Size, Allocator};
-
- llvm::Value *Addr =
- CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_alloc),
- Args, getName({CVD->getName(), ".void.addr"}));
- llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
- Allocator};
- llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
- CGM.getModule(), OMPRTL___kmpc_free);
+ return UntiedAddr;
+}
- CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
- llvm::makeArrayRef(FiniArgs));
- Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
- Addr,
- CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
- getName({CVD->getName(), ".addr"}));
- return Address(Addr, Align);
+bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
+ const VarDecl *VD) const {
+ auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
+ if (It == FunctionToUntiedTaskStackMap.end())
+ return false;
+ return UntiedLocalVarsStack[It->second].count(VD) > 0;
}
CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
@@ -11210,6 +11941,24 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
}
+CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
+ CodeGenFunction &CGF,
+ const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>,
+ std::pair<Address, Address>> &LocalVars)
+ : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
+ CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
+ CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
+}
+
+CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
+ if (!NeedToPush)
+ return;
+ CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
+}
+
bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");