diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6409 |
1 files changed, 2971 insertions, 3438 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp index caa5291ff6fa..a6a87ec88ee8 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -11,10 +11,13 @@ //===----------------------------------------------------------------------===// #include "CGOpenMPRuntime.h" +#include "ABIInfoImpl.h" #include "CGCXXABI.h" #include "CGCleanup.h" #include "CGRecordLayout.h" #include "CodeGenFunction.h" +#include "TargetInfo.h" +#include "clang/AST/APValue.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/OpenMPClause.h" @@ -27,17 +30,22 @@ #include "clang/CodeGen/ConstantInitBuilder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitcodeReader.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Value.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" #include <cassert> +#include <cstdint> #include <numeric> +#include <optional> using namespace clang; using namespace CodeGen; @@ -367,8 +375,7 @@ public: /*RefersToEnclosingVariableOrCapture=*/false, VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); - PrivScope.addPrivate( - VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); + PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress()); } (void)PrivScope.Privatize(); } @@ -406,7 +413,7 @@ private: /// RAII for emitting code of OpenMP constructs. class InlinedOpenMPRegionRAII { CodeGenFunction &CGF; - llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; + llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; FieldDecl *LambdaThisCaptureField = nullptr; const CodeGen::CGBlockInfo *BlockInfo = nullptr; bool NoInheritance = false; @@ -448,7 +455,7 @@ public: /// Values for bit flags used in the ident_t to describe the fields. /// All enumeric elements are named and described in accordance with the code -/// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h +/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h enum OpenMPLocationFlags : unsigned { /// Use trampoline for internal microtask. OMP_IDENT_IMD = 0x01, @@ -475,35 +482,9 @@ enum OpenMPLocationFlags : unsigned { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE) }; -namespace { -LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); -/// Values for bit flags for marking which requires clauses have been used. -enum OpenMPOffloadingRequiresDirFlags : int64_t { - /// flag undefined. - OMP_REQ_UNDEFINED = 0x000, - /// no requires clause present. - OMP_REQ_NONE = 0x001, - /// reverse_offload clause. - OMP_REQ_REVERSE_OFFLOAD = 0x002, - /// unified_address clause. - OMP_REQ_UNIFIED_ADDRESS = 0x004, - /// unified_shared_memory clause. - OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, - /// dynamic_allocators clause. - OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, - LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) -}; - -enum OpenMPOffloadingReservedDeviceIDs { - /// Device ID if the device was not defined, runtime should get it - /// from environment variables in the spec. - OMP_DEVICEID_UNDEF = -1, -}; -} // anonymous namespace - /// Describes ident structure that describes a source location. /// All descriptions are taken from -/// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h +/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h /// Original structure: /// typedef struct ident { /// kmp_int32 reserved_1; /**< might be used in Fortran; @@ -631,10 +612,8 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), - [=]() { return Private; }); - PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), - [=]() { return Original; }); + PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); + PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); (void)PrivateScope.Privatize(); RValue Func = RValue::get(Reduction.second); CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); @@ -645,7 +624,7 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, auto *GV = new llvm::GlobalVariable( CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, Init, Name); - LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); + LValue LV = CGF.MakeNaturalAlignRawAddrLValue(GV, Ty); RValue InitRVal; switch (CGF.getEvaluationKind(Ty)) { case TEK_Scalar: @@ -655,11 +634,15 @@ static void emitInitWithReductionInitializer(CodeGenFunction &CGF, InitRVal = RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); break; - case TEK_Aggregate: - InitRVal = RValue::getAggregate(LV.getAddress(CGF)); - break; + case TEK_Aggregate: { + OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); + CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), + /*IsInitializer=*/false); + return; + } } - OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); + OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), /*IsInitializer=*/false); @@ -682,18 +665,16 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, // Drill down to the base element type on both arrays. const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); - DestAddr = - CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); if (DRD) - SrcAddr = - CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); + SrcAddr = SrcAddr.withElementType(DestAddr.getElementType()); llvm::Value *SrcBegin = nullptr; if (DRD) - SrcBegin = SrcAddr.getPointer(); - llvm::Value *DestBegin = DestAddr.getPointer(); + SrcBegin = SrcAddr.emitRawPointer(CGF); + llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF); // Cast from pointer to array type to pointer to single element. - llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); + llvm::Value *DestEnd = + CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); // The basic structure here is a while-do loop. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); @@ -714,14 +695,14 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, "omp.arraycpy.srcElementPast"); SrcElementPHI->addIncoming(SrcBegin, EntryBB); SrcElementCurrent = - Address(SrcElementPHI, + Address(SrcElementPHI, SrcAddr.getElementType(), SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); } llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); DestElementPHI->addIncoming(DestBegin, EntryBB); Address DestElementCurrent = - Address(DestElementPHI, + Address(DestElementPHI, DestAddr.getElementType(), DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. @@ -738,13 +719,15 @@ static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, if (DRD) { // Shift the address forward by one element. llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, + "omp.arraycpy.dest.element"); SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); } // Shift the address forward by one element. llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, + "omp.arraycpy.dest.element"); // Check whether we've reached the end. llvm::Value *Done = CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); @@ -761,13 +744,13 @@ LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, const Expr *E) { - if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) - return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); + if (const auto *OASE = dyn_cast<ArraySectionExpr>(E)) + return CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false); return LValue(); } void ReductionCodeGen::emitAggregateInitialization( - CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, const OMPDeclareReductionDecl *DRD) { // Emit VarDecl with copy init for arrays. // Get the address of the original variable captured in current @@ -780,7 +763,7 @@ void ReductionCodeGen::emitAggregateInitialization( EmitDeclareReductionInit, EmitDeclareReductionInit ? ClausesData[N].ReductionOp : PrivateVD->getInit(), - DRD, SharedLVal.getAddress(CGF)); + DRD, SharedAddr); } ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, @@ -818,10 +801,8 @@ void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { } void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { - const auto *PrivateVD = - cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); - QualType PrivateType = PrivateVD->getType(); - bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); + QualType PrivateType = getPrivateType(N); + bool AsArraySection = isa<ArraySectionExpr>(ClausesData[N].Ref); if (!PrivateType->isVariablyModifiedType()) { Sizes.emplace_back( CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), @@ -830,12 +811,11 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { } llvm::Value *Size; llvm::Value *SizeInChars; - auto *ElemType = - cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) - ->getElementType(); + auto *ElemType = OrigAddresses[N].first.getAddress().getElementType(); auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); if (AsArraySection) { - Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), + Size = CGF.Builder.CreatePtrDiff(ElemType, + OrigAddresses[N].second.getPointer(CGF), OrigAddresses[N].first.getPointer(CGF)); Size = CGF.Builder.CreateNUWAdd( Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); @@ -856,9 +836,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, llvm::Value *Size) { - const auto *PrivateVD = - cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); - QualType PrivateType = PrivateVD->getType(); + QualType PrivateType = getPrivateType(N); if (!PrivateType->isVariablyModifiedType()) { assert(!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " @@ -874,31 +852,22 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, } void ReductionCodeGen::emitInitialization( - CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, + CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { assert(SharedAddresses.size() > N && "No variable was generated"); const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); const OMPDeclareReductionDecl *DRD = getReductionInit(ClausesData[N].ReductionOp); - QualType PrivateType = PrivateVD->getType(); - PrivateAddr = CGF.Builder.CreateElementBitCast( - PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); - QualType SharedType = SharedAddresses[N].first.getType(); - SharedLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), - CGF.ConvertTypeForMem(SharedType)), - SharedType, SharedAddresses[N].first.getBaseInfo(), - CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { if (DRD && DRD->getInitializer()) (void)DefaultInit(CGF); - emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); + emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { (void)DefaultInit(CGF); + QualType SharedType = SharedAddresses[N].first.getType(); emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, - PrivateAddr, SharedLVal.getAddress(CGF), - SharedLVal.getType()); + PrivateAddr, SharedAddr, SharedType); } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && !CGF.isTrivialInitializer(PrivateVD->getInit())) { CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, @@ -908,22 +877,18 @@ void ReductionCodeGen::emitInitialization( } bool ReductionCodeGen::needCleanups(unsigned N) { - const auto *PrivateVD = - cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); - QualType PrivateType = PrivateVD->getType(); + QualType PrivateType = getPrivateType(N); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); return DTorKind != QualType::DK_none; } void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr) { - const auto *PrivateVD = - cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); - QualType PrivateType = PrivateVD->getType(); + QualType PrivateType = getPrivateType(N); QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); if (needCleanups(N)) { - PrivateAddr = CGF.Builder.CreateElementBitCast( - PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); + PrivateAddr = + PrivateAddr.withElementType(CGF.ConvertTypeForMem(PrivateType)); CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); } } @@ -934,24 +899,22 @@ static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && !CGF.getContext().hasSameType(BaseTy, ElTy)) { if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { - BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); + BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy); } else { - LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); + LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy); BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); } BaseTy = BaseTy->getPointeeType(); } return CGF.MakeAddrLValue( - CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), - CGF.ConvertTypeForMem(ElTy)), + BaseLV.getAddress().withElementType(CGF.ConvertTypeForMem(ElTy)), BaseLV.getType(), BaseLV.getBaseInfo(), CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); } static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, - llvm::Type *BaseLVType, CharUnits BaseLVAlignment, - llvm::Value *Addr) { - Address Tmp = Address::invalid(); + Address OriginalBaseAddress, llvm::Value *Addr) { + RawAddress Tmp = RawAddress::invalid(); Address TopTmp = Address::invalid(); Address MostTopTmp = Address::invalid(); BaseTy = BaseTy.getNonReferenceType(); @@ -965,22 +928,24 @@ static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, TopTmp = Tmp; BaseTy = BaseTy->getPointeeType(); } - llvm::Type *Ty = BaseLVType; - if (Tmp.isValid()) - Ty = Tmp.getElementType(); - Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); + if (Tmp.isValid()) { + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, Tmp.getElementType()); CGF.Builder.CreateStore(Addr, Tmp); return MostTopTmp; } - return Address(Addr, BaseLVAlignment); + + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, OriginalBaseAddress.getType()); + return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); } static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { const VarDecl *OrigVD = nullptr; - if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { + if (const auto *OASE = dyn_cast<ArraySectionExpr>(Ref)) { const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); - while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) + while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Base)) Base = TempOASE->getBase()->IgnoreParenImpCasts(); while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) Base = TempASE->getBase()->IgnoreParenImpCasts(); @@ -1005,17 +970,18 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, LValue BaseLValue = loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), OriginalBaseLValue); + Address SharedAddr = SharedAddresses[N].first.getAddress(); llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( - BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); + SharedAddr.getElementType(), BaseLValue.getPointer(CGF), + SharedAddr.emitRawPointer(CGF)); llvm::Value *PrivatePointer = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivateAddr.getPointer(), - SharedAddresses[N].first.getAddress(CGF).getType()); - llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); + PrivateAddr.emitRawPointer(CGF), SharedAddr.getType()); + llvm::Value *Ptr = CGF.Builder.CreateGEP( + SharedAddr.getElementType(), PrivatePointer, Adjustment); return castToBase(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), - OriginalBaseLValue.getAddress(CGF).getType(), - OriginalBaseLValue.getAlignment(), Ptr); + OriginalBaseLValue.getAddress(), Ptr); } BaseDecls.emplace_back( cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); @@ -1034,7 +1000,7 @@ LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { getThreadIDVariable()->getType()->castAs<PointerType>()); } -void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { +void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { if (!CGF.HaveInsertPoint()) return; // 1.2.2 OpenMP Language Terminology @@ -1043,6 +1009,8 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { // The point of exit cannot be a branch out of the structured block. // longjmp() and throw() must not violate the entry/exit criteria. CGF.EHStack.pushTerminate(); + if (S) + CGF.incrementProfileCounter(S); CodeGen(CGF); CGF.EHStack.popTerminate(); } @@ -1065,15 +1033,26 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, return Field; } -CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, - StringRef Separator) - : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), - OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { +CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) + : CGM(CGM), OMPBuilder(CGM.getModule()) { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); - - // Initialize Types used in OpenMPIRBuilder from OMPKinds.def + llvm::OpenMPIRBuilderConfig Config( + CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(), + CGM.getLangOpts().OpenMPOffloadMandatory, + /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false, + hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false); OMPBuilder.initialize(); - loadOffloadInfoMetadata(); + OMPBuilder.loadOffloadInfoMetadata(CGM.getLangOpts().OpenMPIsTargetDevice + ? CGM.getLangOpts().OMPHostIRFile + : StringRef{}); + OMPBuilder.setConfig(Config); + + // The user forces the compiler to behave as if omp requires + // unified_shared_memory was given. + if (CGM.getLangOpts().OpenMPForceUSM) { + HasRequiresUnifiedSharedMemory = true; + OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); + } } void CGOpenMPRuntime::clear() { @@ -1092,14 +1071,7 @@ void CGOpenMPRuntime::clear() { } std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { - SmallString<128> Buffer; - llvm::raw_svector_ostream OS(Buffer); - StringRef Sep = FirstSeparator; - for (StringRef Part : Parts) { - OS << Sep << Part; - Sep = Separator; - } - return std::string(OS.str()); + return OMPBuilder.createPlatformSpecificName(Parts); } static llvm::Function * @@ -1111,9 +1083,9 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, QualType PtrTy = C.getPointerType(Ty).withRestrict(); FunctionArgList Args; ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), - /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); + /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), - /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); + /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other); Args.push_back(&OmpOutParm); Args.push_back(&OmpInParm); const CGFunctionInfo &FnInfo = @@ -1136,15 +1108,13 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, Out->getLocation()); CodeGenFunction::OMPPrivateScope Scope(CGF); Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); - Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { - return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) - .getAddress(CGF); - }); + Scope.addPrivate( + In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) + .getAddress()); Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); - Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { - return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) - .getAddress(CGF); - }); + Scope.addPrivate( + Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) + .getAddress()); (void)Scope.Privatize(); if (!IsCombiner && Out->hasInit() && !CGF.isTrivialInitializer(Out->getInit())) { @@ -1172,7 +1142,7 @@ void CGOpenMPRuntime::emitUserDefinedReduction( if (const Expr *Init = D->getInitializer()) { Initializer = emitCombinerOrInitializer( CGM, D->getType(), - D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init + D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init : nullptr, cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), @@ -1199,7 +1169,7 @@ namespace { // Builder if one is present. struct PushAndPopStackRAII { PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, - bool HasCancel) + bool HasCancel, llvm::omp::Directive Kind) : OMPBuilder(OMPBuilder) { if (!OMPBuilder) return; @@ -1228,8 +1198,7 @@ struct PushAndPopStackRAII { // TODO: Remove this once we emit parallel regions through the // OpenMPIRBuilder as it can do this setup internally. - llvm::OpenMPIRBuilder::FinalizationInfo FI( - {FiniCB, OMPD_parallel, HasCancel}); + llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); OMPBuilder->pushFinalizationCB(std::move(FI)); } ~PushAndPopStackRAII() { @@ -1270,27 +1239,45 @@ static llvm::Function *emitParallelOrTeamsOutlinedFunction( // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new // parallel region to make cancellation barriers work properly. llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); - PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); + PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, HasCancel, OutlinedHelperName); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); } +std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const { + std::string Suffix = getName({"omp_outlined"}); + return (Name + Suffix).str(); +} + +std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const { + return getOutlinedHelperName(CGF.CurFn->getName()); +} + +std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const { + std::string Suffix = getName({"omp", "reduction", "reduction_func"}); + return (Name + Suffix).str(); +} + llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), + CodeGen); } llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); return emitParallelOrTeamsOutlinedFunction( - CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); + CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF), + CodeGen); } llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( @@ -1340,51 +1327,6 @@ llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( return Res; } -static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, - const RecordDecl *RD, const CGRecordLayout &RL, - ArrayRef<llvm::Constant *> Data) { - llvm::StructType *StructTy = RL.getLLVMType(); - unsigned PrevIdx = 0; - ConstantInitBuilder CIBuilder(CGM); - auto DI = Data.begin(); - for (const FieldDecl *FD : RD->fields()) { - unsigned Idx = RL.getLLVMFieldNo(FD); - // Fill the alignment. - for (unsigned I = PrevIdx; I < Idx; ++I) - Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); - PrevIdx = Idx + 1; - Fields.add(*DI); - ++DI; - } -} - -template <class... As> -static llvm::GlobalVariable * -createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, - ArrayRef<llvm::Constant *> Data, const Twine &Name, - As &&... Args) { - const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); - const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); - ConstantInitBuilder CIBuilder(CGM); - ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); - buildStructValue(Fields, CGM, RD, RL, Data); - return Fields.finishAndCreateGlobal( - Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, - std::forward<As>(Args)...); -} - -template <typename T> -static void -createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, - ArrayRef<llvm::Constant *> Data, - T &Parent) { - const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); - const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); - ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); - buildStructValue(Fields, CGM, RD, RL, Data); - Fields.finishAndAddTo(Parent); -} - void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint) { auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); @@ -1425,25 +1367,27 @@ static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, - unsigned Flags) { + unsigned Flags, bool EmitLoc) { + uint32_t SrcLocStrSize; llvm::Constant *SrcLocStr; - if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || + if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() == + llvm::codegenoptions::NoDebugInfo) || Loc.isInvalid()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); } else { - std::string FunctionName = ""; + std::string FunctionName; if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) FunctionName = FD->getQualifiedNameAsString(); PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); const char *FileName = PLoc.getFilename(); unsigned Line = PLoc.getLine(); unsigned Column = PLoc.getColumn(); - SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, - Line, Column); + SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, + Column, SrcLocStrSize); } unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); - return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), - Reserved2Flags); + return OMPBuilder.getOrCreateIdent( + SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); } llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, @@ -1454,10 +1398,11 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, if (CGM.getLangOpts().OpenMPIRBuilder) { SmallString<128> Buffer; OMPBuilder.updateToLocation(CGF.Builder.saveIP()); + uint32_t SrcLocStrSize; auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( - getIdentStringFromSourceLocation(CGF, Loc, Buffer)); + getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); return OMPBuilder.getOrCreateThreadID( - OMPBuilder.getOrCreateIdent(SrcLocStr)); + OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); } llvm::Value *ThreadID = nullptr; @@ -1505,6 +1450,7 @@ llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, setLocThreadIdInsertPt(CGF); CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); llvm::CallInst *Call = CGF.Builder.CreateCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_global_thread_num), @@ -1549,157 +1495,94 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { return llvm::PointerType::getUnqual(Kmpc_MicroTy); } -llvm::FunctionCallee -CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" - : "__kmpc_for_static_init_4u") - : (IVSigned ? "__kmpc_for_static_init_8" - : "__kmpc_for_static_init_8u"); - llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto *PtrTy = llvm::PointerType::getUnqual(ITy); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - CGM.Int32Ty, // schedtype - llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter - PtrTy, // p_lower - PtrTy, // p_upper - PtrTy, // p_stride - ITy, // incr - ITy // chunk - }; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - return CGM.CreateRuntimeFunction(FnTy, Name); -} - -llvm::FunctionCallee -CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name = - IVSize == 32 - ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") - : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); - llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - CGM.Int32Ty, // schedtype - ITy, // lower - ITy, // upper - ITy, // stride - ITy // chunk - }; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); - return CGM.CreateRuntimeFunction(FnTy, Name); -} - -llvm::FunctionCallee -CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name = - IVSize == 32 - ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") - : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - }; - auto *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); - return CGM.CreateRuntimeFunction(FnTy, Name); -} - -llvm::FunctionCallee -CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { - assert((IVSize == 32 || IVSize == 64) && - "IV size is not compatible with the omp runtime"); - StringRef Name = - IVSize == 32 - ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") - : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); - llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; - auto *PtrTy = llvm::PointerType::getUnqual(ITy); - llvm::Type *TypeParams[] = { - getIdentTyPointerTy(), // loc - CGM.Int32Ty, // tid - llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter - PtrTy, // p_lower - PtrTy, // p_upper - PtrTy // p_stride - }; - auto *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); - return CGM.CreateRuntimeFunction(FnTy, Name); +llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind +convertDeviceClause(const VarDecl *VD) { + std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(VD); + if (!DevTy) + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; + + switch ((int)*DevTy) { // Avoid -Wcovered-switch-default + case OMPDeclareTargetDeclAttr::DT_Host: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost; + break; + case OMPDeclareTargetDeclAttr::DT_NoHost: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost; + break; + case OMPDeclareTargetDeclAttr::DT_Any: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone; + break; + } } -/// Obtain information that uniquely identifies a target entry. This -/// consists of the file and device IDs as well as line number associated with -/// the relevant entry source location. -static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, - unsigned &DeviceID, unsigned &FileID, - unsigned &LineNum) { - SourceManager &SM = C.getSourceManager(); +llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind +convertCaptureClause(const VarDecl *VD) { + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType = + OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + if (!MapType) + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; + switch ((int)*MapType) { // Avoid -Wcovered-switch-default + case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo; + break; + case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter; + break; + case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink; + break; + default: + return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone; + break; + } +} - // The loc should be always valid and have a file ID (the user cannot use - // #pragma directives in macros) +static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc( + CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder, + SourceLocation BeginLoc, llvm::StringRef ParentName = "") { - assert(Loc.isValid() && "Source location is expected to be always valid."); + auto FileInfoCallBack = [&]() { + SourceManager &SM = CGM.getContext().getSourceManager(); + PresumedLoc PLoc = SM.getPresumedLoc(BeginLoc); - PresumedLoc PLoc = SM.getPresumedLoc(Loc); - assert(PLoc.isValid() && "Source location is expected to be always valid."); + llvm::sys::fs::UniqueID ID; + if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { + PLoc = SM.getPresumedLoc(BeginLoc, /*UseLineDirectives=*/false); + } - llvm::sys::fs::UniqueID ID; - if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) - SM.getDiagnostics().Report(diag::err_cannot_open_file) - << PLoc.getFilename() << EC.message(); + return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine()); + }; - DeviceID = ID.getDevice(); - FileID = ID.getFile(); - LineNum = PLoc.getLine(); + return OMPBuilder.getTargetEntryUniqueInfo(FileInfoCallBack, ParentName); } -Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { - if (CGM.getLangOpts().OpenMPSimd) - return Address::invalid(); - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && - HasRequiresUnifiedSharedMemory))) { - SmallString<64> PtrName; - { - llvm::raw_svector_ostream OS(PtrName); - OS << CGM.getMangledName(GlobalDecl(VD)); - if (!VD->isExternallyVisible()) { - unsigned DeviceID, FileID, Line; - getTargetEntryUniqueInfo(CGM.getContext(), - VD->getCanonicalDecl()->getBeginLoc(), - DeviceID, FileID, Line); - OS << llvm::format("_%x", FileID); - } - OS << "_decl_tgt_ref_ptr"; - } - llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); - if (!Ptr) { - QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); - Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), - PtrName); +ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { + auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; - auto *GV = cast<llvm::GlobalVariable>(Ptr); - GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); + auto LinkageForVariable = [&VD, this]() { + return CGM.getLLVMLinkageVarDefinition(VD); + }; - if (!CGM.getLangOpts().OpenMPIsDevice) - GV->setInitializer(CGM.GetAddrOfGlobal(VD)); - registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); - } - return Address(Ptr, CGM.getContext().getDeclAlign(VD)); - } - return Address::invalid(); + std::vector<llvm::GlobalVariable *> GeneratedRefs; + + llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem( + CGM.getContext().getPointerType(VD->getType())); + llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar( + convertCaptureClause(VD), convertDeviceClause(VD), + VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, + VD->isExternallyVisible(), + getEntryInfoFromPresumedLoc(CGM, OMPBuilder, + VD->getCanonicalDecl()->getBeginLoc()), + CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, + CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, AddrOfGlobal, + LinkageForVariable); + + if (!addr) + return ConstantAddress::invalid(); + return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); } llvm::Constant * @@ -1708,8 +1591,8 @@ CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { !CGM.getContext().getTargetInfo().isTLSSupported()); // Lookup the entry, lazily creating it if necessary. std::string Suffix = getName({"cache", ""}); - return getOrCreateInternalVariable( - CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); + return OMPBuilder.getOrCreateInternalVariable( + CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); } Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, @@ -1721,16 +1604,17 @@ Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, return VDAddr; llvm::Type *VarTy = VDAddr.getElementType(); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - CGF.Builder.CreatePointerCast(VDAddr.getPointer(), - CGM.Int8PtrTy), - CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), - getOrCreateThreadPrivateCache(VD)}; - return Address(CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), - Args), - VDAddr.getAlignment()); + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.Int8PtrTy), + CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), + getOrCreateThreadPrivateCache(VD)}; + return Address( + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), + Args), + CGF.Int8Ty, VDAddr.getAlignment()); } void CGOpenMPRuntime::emitThreadPrivateVarInit( @@ -1745,7 +1629,8 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit( // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) // to register constructor/destructor for variable. llvm::Value *Args[] = { - OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), + OMPLoc, + CGF.Builder.CreatePointerCast(VDAddr.emitRawPointer(CGF), CGM.VoidPtrTy), Ctor, CopyCtor, Dtor}; CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction( @@ -1773,7 +1658,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, /*Id=*/nullptr, CGM.getContext().VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&Dst); const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1787,9 +1672,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); - Address Arg = Address(ArgVal, VDAddr.getAlignment()); - Arg = CtorCGF.Builder.CreateElementBitCast( - Arg, CtorCGF.ConvertTypeForMem(ASTTy)); + Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(ASTTy), + VDAddr.getAlignment()); CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), /*IsInitializer=*/true); ArgVal = CtorCGF.EmitLoadOfScalar( @@ -1806,7 +1690,7 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( FunctionArgList Args; ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, /*Id=*/nullptr, CGM.getContext().VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&Dst); const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( @@ -1823,9 +1707,10 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( DtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); - DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, - DtorCGF.getDestroyer(ASTTy.isDestructedType()), - DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); + DtorCGF.emitDestroy( + Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, + DtorCGF.getDestroyer(ASTTy.isDestructedType()), + DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); DtorCGF.FinishFunction(); Dtor = Fn; } @@ -1873,119 +1758,39 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( return nullptr; } -bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, - llvm::GlobalVariable *Addr, - bool PerformInit) { - if (CGM.getLangOpts().OMPTargetTriples.empty() && - !CGM.getLangOpts().OpenMPIsDevice) - return false; - Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = - OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); - if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && - HasRequiresUnifiedSharedMemory)) - return CGM.getLangOpts().OpenMPIsDevice; - VD = VD->getDefinition(CGM.getContext()); - assert(VD && "Unknown VarDecl"); - - if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) - return CGM.getLangOpts().OpenMPIsDevice; - - QualType ASTTy = VD->getType(); - SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); - - // Produce the unique prefix to identify the new target regions. We use - // the source location of the variable declaration which we know to not - // conflict with any target region. - unsigned DeviceID; - unsigned FileID; - unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); - SmallString<128> Buffer, Out; - { - llvm::raw_svector_ostream OS(Buffer); - OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) - << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; - } - - const Expr *Init = VD->getAnyInitializer(); - if (CGM.getLangOpts().CPlusPlus && PerformInit) { - llvm::Constant *Ctor; - llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsDevice) { - // Generate function that re-emits the declaration's initializer into - // the threadprivate copy of the variable VD - CodeGenFunction CtorCGF(CGM); +void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD, + llvm::GlobalValue *GV) { + std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr = + OMPDeclareTargetDeclAttr::getActiveAttr(FD); - const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( - FTy, Twine(Buffer, "_ctor"), FI, Loc); - auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); - CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, - FunctionArgList(), Loc, Loc); - auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); - CtorCGF.EmitAnyExprToMem(Init, - Address(Addr, CGM.getContext().getDeclAlign(VD)), - Init->getType().getQualifiers(), - /*IsInitializer=*/true); - CtorCGF.FinishFunction(); - Ctor = Fn; - ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); - CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); - } else { - Ctor = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); - ID = Ctor; - } - - // Register the information for the entry associated with the constructor. - Out.clear(); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, - ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); - } - if (VD->getType().isDestructedType() != QualType::DK_none) { - llvm::Constant *Dtor; - llvm::Constant *ID; - if (CGM.getLangOpts().OpenMPIsDevice) { - // Generate function that emits destructor call for the threadprivate - // copy of the variable VD - CodeGenFunction DtorCGF(CGM); + // We only need to handle active 'indirect' declare target functions. + if (!ActiveAttr || !(*ActiveAttr)->getIndirect()) + return; - const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( - FTy, Twine(Buffer, "_dtor"), FI, Loc); - auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); - DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, - FunctionArgList(), Loc, Loc); - // Create a scope with an artificial location for the body of this - // function. - auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); - DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), - ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), - DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); - DtorCGF.FinishFunction(); - Dtor = Fn; - ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); - CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); - } else { - Dtor = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::PrivateLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); - ID = Dtor; - } - // Register the information for the entry associated with the destructor. - Out.clear(); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, - ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); + // Get a mangled name to store the new device global in. + llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( + CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName()); + SmallString<128> Name; + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo); + + // We need to generate a new global to hold the address of the indirectly + // called device function. Doing this allows us to keep the visibility and + // linkage of the associated function unchanged while allowing the runtime to + // access its value. + llvm::GlobalValue *Addr = GV; + if (CGM.getLangOpts().OpenMPIsTargetDevice) { + Addr = new llvm::GlobalVariable( + CGM.getModule(), CGM.VoidPtrTy, + /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name, + nullptr, llvm::GlobalValue::NotThreadLocal, + CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace()); + Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility); } - return CGM.getLangOpts().OpenMPIsDevice; + + OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( + Name, Addr, CGM.GetTargetTypeStoreSize(CGM.VoidPtrTy).getQuantity(), + llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect, + llvm::GlobalValue::WeakODRLinkage); } Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, @@ -1993,12 +1798,13 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, StringRef Name) { std::string Suffix = getName({"artificial", ""}); llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); - llvm::Value *GAddr = - getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); + llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( + VarLVType, Twine(Name).concat(Suffix).str()); if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && CGM.getTarget().isTLSSupported()) { - cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); - return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); + GAddr->setThreadLocal(/*Val=*/true); + return Address(GAddr, GAddr->getValueType(), + CGM.getContext().getTypeAlignInChars(VarType)); } std::string CacheSuffix = getName({"cache", ""}); llvm::Value *Args[] = { @@ -2007,8 +1813,9 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, /*isSigned=*/false), - getOrCreateInternalVariable( - CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; + OMPBuilder.getOrCreateInternalVariable( + CGM.VoidPtrPtrTy, + Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; return Address( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CGF.EmitRuntimeCall( @@ -2016,7 +1823,7 @@ Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), Args), VarLVType->getPointerTo(/*AddrSpace=*/0)), - CGM.getContext().getTypeAlignInChars(VarType)); + VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); } void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, @@ -2061,7 +1868,8 @@ void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, - const Expr *IfCond) { + const Expr *IfCond, + llvm::Value *NumThreads) { if (!CGF.HaveInsertPoint()) return; llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); @@ -2095,22 +1903,23 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, // OutlinedFn(>id, &zero_bound, CapturedStruct); Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); - Address ZeroAddrBound = + RawAddress ZeroAddrBound = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".bound.zero.addr"); - CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); + CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; // ThreadId for serialized parallels is 0. - OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); + OutlinedFnArgs.push_back(ThreadIDAddr.emitRawPointer(CGF)); OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); // Ensure we do not inline the function. This is trivially true for the ones - // passed to __kmpc_fork_call but the ones calles in serialized regions + // passed to __kmpc_fork_call but the ones called in serialized regions // could be inlined. This is not a perfect but it is closer to the invariant // we want, namely, every data environment starts with a new function. // TODO: We should pass the if condition to the runtime function and do the // handling there. Much cleaner code. + OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); OutlinedFn->addFnAttr(llvm::Attribute::NoInline); RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); @@ -2139,7 +1948,7 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, if (auto *OMPRegionInfo = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) if (OMPRegionInfo->getThreadIDVariable()) - return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); + return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(); llvm::Value *ThreadID = getThreadID(CGF, Loc); QualType Int32Ty = @@ -2151,30 +1960,10 @@ Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, return ThreadIDTemp; } -llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( - llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { - SmallString<256> Buffer; - llvm::raw_svector_ostream Out(Buffer); - Out << Name; - StringRef RuntimeName = Out.str(); - auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; - if (Elem.second) { - assert(Elem.second->getType()->getPointerElementType() == Ty && - "OMP internal variable has different type than requested"); - return &*Elem.second; - } - - return Elem.second = new llvm::GlobalVariable( - CGM.getModule(), Ty, /*IsConstant*/ false, - llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), - Elem.first(), /*InsertBefore=*/nullptr, - llvm::GlobalValue::NotThreadLocal, AddressSpace); -} - llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); std::string Name = getName({Prefix, "var"}); - return getOrCreateInternalVariable(KmpCriticalNameTy, Name); + return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); } namespace { @@ -2269,6 +2058,35 @@ void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, Action.Done(CGF); } +void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &MaskedOpGen, + SourceLocation Loc, const Expr *Filter) { + if (!CGF.HaveInsertPoint()) + return; + // if(__kmpc_masked(ident_t *, gtid, filter)) { + // MaskedOpGen(); + // __kmpc_end_masked(iden_t *, gtid); + // } + // Prepare arguments and build a call to __kmpc_masked + llvm::Value *FilterVal = Filter + ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) + : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + FilterVal}; + llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), + getThreadID(CGF, Loc)}; + CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_masked), + Args, + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_end_masked), + ArgsEnd, + /*Conditional=*/true); + MaskedOpGen.setAction(Action); + emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); + Action.Done(CGF); +} + void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) { if (!CGF.HaveInsertPoint()) @@ -2317,14 +2135,15 @@ static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); - Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); - Addr = CGF.Builder.CreateElementBitCast( - Addr, CGF.ConvertTypeForMem(Var->getType())); - return Addr; + llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); + return Address( + CGF.Builder.CreateBitCast( + Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), + ElemTy, CGF.getContext().getDeclAlign(Var)); } static llvm::Value *emitCopyprivateCopyFunction( - CodeGenModule &CGM, llvm::Type *ArgsType, + CodeGenModule &CGM, llvm::Type *ArgsElemType, ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, SourceLocation Loc) { @@ -2332,9 +2151,9 @@ static llvm::Value *emitCopyprivateCopyFunction( // void copy_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); const auto &CGFI = @@ -2351,11 +2170,13 @@ static llvm::Value *emitCopyprivateCopyFunction( // Dest = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), - ArgsType), CGF.getPointerAlign()); + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), + ArgsElemType->getPointerTo()), + ArgsElemType, CGF.getPointerAlign()); Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), - ArgsType), CGF.getPointerAlign()); + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), + ArgsElemType->getPointerTo()), + ArgsElemType, CGF.getPointerAlign()); // *(Type0*)Dst[0] = *(Type0*)Src[0]; // *(Type1*)Dst[1] = *(Type1*)Src[1]; // ... @@ -2428,7 +2249,7 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, if (DidIt.isValid()) { llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); QualType CopyprivateArrayTy = C.getConstantArrayType( - C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, + C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); // Create a list of all private variables for copyprivate. Address CopyprivateList = @@ -2444,18 +2265,17 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, // Build function that copies private values from single region to all other // threads in the corresponding parallel region. llvm::Value *CpyFn = emitCopyprivateCopyFunction( - CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), - CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); + CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, + SrcExprs, DstExprs, AssignmentOps, Loc); llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); - Address CL = - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, - CGF.VoidPtrTy); + Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc), // ident_t *<loc> getThreadID(CGF, Loc), // i32 <gtid> BufSize, // size_t <buf_size> - CL.getPointer(), // void *<copyprivate list> + CL.emitRawPointer(CGF), // void *<copyprivate list> CpyFn, // void (*) (void *, void *) <copy_func> DidItVal // i32 did_it }; @@ -2572,6 +2392,22 @@ void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, Args); } +void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, + Expr *ME, bool IsFatal) { + llvm::Value *MVL = + ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + // Build call void __kmpc_error(ident_t *loc, int severity, const char + // *message) + llvm::Value *Args[] = { + emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), + llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), + CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_error), + Args); +} + /// Map the OpenMP loop schedule to the runtime enumeration. static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered) { @@ -2715,7 +2551,17 @@ void CGOpenMPRuntime::emitForDispatchInit( CGF.Builder.getIntN(IVSize, 1), // Stride Chunk // Chunk }; - CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); + CGF.EmitRuntimeCall(OMPBuilder.createDispatchInitFunction(IVSize, IVSigned), + Args); +} + +void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, + SourceLocation Loc) { + if (!CGF.HaveInsertPoint()) + return; + // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid); + llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; + CGF.EmitRuntimeCall(OMPBuilder.createDispatchDeinitFunction(), Args); } static void emitForStaticInitCall( @@ -2757,10 +2603,10 @@ static void emitForStaticInitCall( ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, M2)), // Schedule type - Values.IL.getPointer(), // &isLastIter - Values.LB.getPointer(), // &LB - Values.UB.getPointer(), // &UB - Values.ST.getPointer(), // &Stride + Values.IL.emitRawPointer(CGF), // &isLastIter + Values.LB.emitRawPointer(CGF), // &LB + Values.UB.emitRawPointer(CGF), // &UB + Values.ST.emitRawPointer(CGF), // &Stride CGF.Builder.getIntN(Values.IVSize, 1), // Incr Chunk // Chunk }; @@ -2774,7 +2620,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, const StaticRTInput &Values) { OpenMPSchedType ScheduleNum = getRuntimeSchedule( ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); - assert(isOpenMPWorksharingDirective(DKind) && + assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) && "Expected loop-based or sections-based directive."); llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, isOpenMPLoopDirective(DKind) @@ -2782,7 +2628,8 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, : OMP_IDENT_WORK_SECTIONS); llvm::Value *ThreadId = getThreadID(CGF, Loc); llvm::FunctionCallee StaticInitFunction = - createForStaticInitFunction(Values.IVSize, Values.IVSigned); + OMPBuilder.createForStaticInitFunction(Values.IVSize, Values.IVSigned, + false); auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); @@ -2797,8 +2644,13 @@ void CGOpenMPRuntime::emitDistributeStaticInit( llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); llvm::Value *ThreadId = getThreadID(CGF, Loc); - llvm::FunctionCallee StaticInitFunction = - createForStaticInitFunction(Values.IVSize, Values.IVSigned); + llvm::FunctionCallee StaticInitFunction; + bool isGPUDistribute = + CGM.getLangOpts().OpenMPIsTargetDevice && + (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); + StaticInitFunction = OMPBuilder.createForStaticInitFunction( + Values.IVSize, Values.IVSigned, isGPUDistribute); + emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, OMPC_SCHEDULE_MODIFIER_unknown, Values); @@ -2807,21 +2659,33 @@ void CGOpenMPRuntime::emitDistributeStaticInit( void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) { + assert((DKind == OMPD_distribute || DKind == OMPD_for || + DKind == OMPD_sections) && + "Expected distribute, for, or sections directive kind"); if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, - isOpenMPDistributeDirective(DKind) + isOpenMPDistributeDirective(DKind) || + (DKind == OMPD_target_teams_loop) ? OMP_IDENT_WORK_DISTRIBUTE - : isOpenMPLoopDirective(DKind) - ? OMP_IDENT_WORK_LOOP - : OMP_IDENT_WORK_SECTIONS), + : isOpenMPLoopDirective(DKind) + ? OMP_IDENT_WORK_LOOP + : OMP_IDENT_WORK_SECTIONS), getThreadID(CGF, Loc)}; auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_for_static_fini), - Args); + if (isOpenMPDistributeDirective(DKind) && + CGM.getLangOpts().OpenMPIsTargetDevice && + (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), + Args); + else + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_for_static_fini), + Args); } void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, @@ -2832,7 +2696,8 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, return; // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); + CGF.EmitRuntimeCall(OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned), + Args); } llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, @@ -2845,15 +2710,14 @@ llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, // kmp_int[32|64] *p_stride); llvm::Value *Args[] = { - emitUpdateLocation(CGF, Loc), - getThreadID(CGF, Loc), - IL.getPointer(), // &isLastIter - LB.getPointer(), // &Lower - UB.getPointer(), // &Upper - ST.getPointer() // &Stride + emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), + IL.emitRawPointer(CGF), // &isLastIter + LB.emitRawPointer(CGF), // &Lower + UB.emitRawPointer(CGF), // &Upper + ST.emitRawPointer(CGF) // &Stride }; - llvm::Value *Call = - CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); + llvm::Value *Call = CGF.EmitRuntimeCall( + OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), Args); return CGF.EmitScalarConversion( Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), CGF.getContext().BoolTy, Loc); @@ -2928,420 +2792,54 @@ enum KmpTaskTFields { }; } // anonymous namespace -bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { - return OffloadEntriesTargetRegion.empty() && - OffloadEntriesDeviceGlobalVar.empty(); -} - -/// Initialize target region entry. -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - unsigned Order) { - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the device " - "code generation."); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = - OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, - OMPTargetRegionEntryTargetRegion); - ++OffloadingEntriesNum; -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, - StringRef ParentName, unsigned LineNum, - llvm::Constant *Addr, llvm::Constant *ID, - OMPTargetRegionEntryKind Flags) { - // If we are emitting code for a target, the entry is already initialized, - // only has to be registered. - if (CGM.getLangOpts().OpenMPIsDevice) { - // This could happen if the device compilation is invoked standalone. - if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) - initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, - OffloadingEntriesNum); - auto &Entry = - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; - Entry.setAddress(Addr); - Entry.setID(ID); - Entry.setFlags(Flags); - } else { - if (Flags == - OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && - hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, - /*IgnoreAddressId*/ true)) - return; - assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && - "Target region entry already registered!"); - OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); - OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; - ++OffloadingEntriesNum; - } -} - -bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( - unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, - bool IgnoreAddressId) const { - auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); - if (PerDevice == OffloadEntriesTargetRegion.end()) - return false; - auto PerFile = PerDevice->second.find(FileID); - if (PerFile == PerDevice->second.end()) - return false; - auto PerParentName = PerFile->second.find(ParentName); - if (PerParentName == PerFile->second.end()) - return false; - auto PerLine = PerParentName->second.find(LineNum); - if (PerLine == PerParentName->second.end()) - return false; - // Fail if this entry is already registered. - if (!IgnoreAddressId && - (PerLine->second.getAddress() || PerLine->second.getID())) - return false; - return true; -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( - const OffloadTargetRegionEntryInfoActTy &Action) { - // Scan all target region entries and perform the provided action. - for (const auto &D : OffloadEntriesTargetRegion) - for (const auto &F : D.second) - for (const auto &P : F.second) - for (const auto &L : P.second) - Action(D.first, F.first, P.first(), L.first, L.second); -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - initializeDeviceGlobalVarEntryInfo(StringRef Name, - OMPTargetGlobalVarEntryKind Flags, - unsigned Order) { - assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " - "only required for the device " - "code generation."); - OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); - ++OffloadingEntriesNum; -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, - CharUnits VarSize, - OMPTargetGlobalVarEntryKind Flags, - llvm::GlobalValue::LinkageTypes Linkage) { - if (CGM.getLangOpts().OpenMPIsDevice) { - // This could happen if the device compilation is invoked standalone. - if (!hasDeviceGlobalVarEntryInfo(VarName)) - initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); - auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; - assert((!Entry.getAddress() || Entry.getAddress() == Addr) && - "Resetting with the new address."); - if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { - if (Entry.getVarSize().isZero()) { - Entry.setVarSize(VarSize); - Entry.setLinkage(Linkage); - } - return; - } - Entry.setVarSize(VarSize); - Entry.setLinkage(Linkage); - Entry.setAddress(Addr); - } else { - if (hasDeviceGlobalVarEntryInfo(VarName)) { - auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; - assert(Entry.isValid() && Entry.getFlags() == Flags && - "Entry not initialized!"); - assert((!Entry.getAddress() || Entry.getAddress() == Addr) && - "Resetting with the new address."); - if (Entry.getVarSize().isZero()) { - Entry.setVarSize(VarSize); - Entry.setLinkage(Linkage); - } - return; - } - OffloadEntriesDeviceGlobalVar.try_emplace( - VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); - ++OffloadingEntriesNum; - } -} - -void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: - actOnDeviceGlobalVarEntriesInfo( - const OffloadDeviceGlobalVarEntryInfoActTy &Action) { - // Scan all target region entries and perform the provided action. - for (const auto &E : OffloadEntriesDeviceGlobalVar) - Action(E.getKey(), E.getValue()); -} - -void CGOpenMPRuntime::createOffloadEntry( - llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, - llvm::GlobalValue::LinkageTypes Linkage) { - StringRef Name = Addr->getName(); - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &C = M.getContext(); - - // Create constant string with the name. - llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); - - std::string StringName = getName({"omp_offloading", "entry_name"}); - auto *Str = new llvm::GlobalVariable( - M, StrPtrInit->getType(), /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); - Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - - llvm::Constant *Data[] = { - llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), - llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), - llvm::ConstantInt::get(CGM.SizeTy, Size), - llvm::ConstantInt::get(CGM.Int32Ty, Flags), - llvm::ConstantInt::get(CGM.Int32Ty, 0)}; - std::string EntryName = getName({"omp_offloading", "entry", ""}); - llvm::GlobalVariable *Entry = createGlobalStruct( - CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, - Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); - - // The entry has to be created in the section the linker expects it to be. - Entry->setSection("omp_offloading_entries"); -} - void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { - // Emit the offloading entries and metadata so that the device codegen side - // can easily figure out what to emit. The produced metadata looks like - // this: - // - // !omp_offload.info = !{!1, ...} - // - // Right now we only generate metadata for function that contain target - // regions. - // If we are in simd mode or there are no entries, we don't need to do // anything. - if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) + if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) return; - llvm::Module &M = CGM.getModule(); - llvm::LLVMContext &C = M.getContext(); - SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, - SourceLocation, StringRef>, - 16> - OrderedEntries(OffloadEntriesInfoManager.size()); - llvm::SmallVector<StringRef, 16> ParentFunctions( - OffloadEntriesInfoManager.size()); - - // Auxiliary methods to create metadata values and strings. - auto &&GetMDInt = [this](unsigned V) { - return llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(CGM.Int32Ty, V)); - }; - - auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; - - // Create the offloading info metadata node. - llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); - - // Create function that emits metadata for each target region entry; - auto &&TargetRegionMetadataEmitter = - [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, - &GetMDString]( - unsigned DeviceID, unsigned FileID, StringRef ParentName, - unsigned Line, - const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { - // Generate metadata for target regions. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (0). - // - Entry 1 -> Device ID of the file where the entry was identified. - // - Entry 2 -> File ID of the file where the entry was identified. - // - Entry 3 -> Mangled name of the function where the entry was - // identified. - // - Entry 4 -> Line in the file where the entry was identified. - // - Entry 5 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), - GetMDInt(FileID), GetMDString(ParentName), - GetMDInt(Line), GetMDInt(E.getOrder())}; - - SourceLocation Loc; - for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), - E = CGM.getContext().getSourceManager().fileinfo_end(); - I != E; ++I) { - if (I->getFirst()->getUniqueID().getDevice() == DeviceID && - I->getFirst()->getUniqueID().getFile() == FileID) { - Loc = CGM.getContext().getSourceManager().translateFileLineCol( - I->getFirst(), Line, 1); - break; - } - } - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); - ParentFunctions[E.getOrder()] = ParentName; - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( - TargetRegionMetadataEmitter); - - // Create function that emits metadata for each device global variable entry; - auto &&DeviceGlobalVarMetadataEmitter = - [&C, &OrderedEntries, &GetMDInt, &GetMDString, - MD](StringRef MangledName, - const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar - &E) { - // Generate metadata for global variables. Each entry of this metadata - // contains: - // - Entry 0 -> Kind of this type of metadata (1). - // - Entry 1 -> Mangled name of the variable. - // - Entry 2 -> Declare target kind. - // - Entry 3 -> Order the entry was created. - // The first element of the metadata node is the kind. - llvm::Metadata *Ops[] = { - GetMDInt(E.getKind()), GetMDString(MangledName), - GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; - - // Save this entry in the right position of the ordered entries array. - OrderedEntries[E.getOrder()] = - std::make_tuple(&E, SourceLocation(), MangledName); - - // Add metadata to the named metadata node. - MD->addOperand(llvm::MDNode::get(C, Ops)); - }; - - OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( - DeviceGlobalVarMetadataEmitter); - - for (const auto &E : OrderedEntries) { - assert(std::get<0>(E) && "All ordered entries must exist!"); - if (const auto *CE = - dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( - std::get<0>(E))) { - if (!CE->getID() || !CE->getAddress()) { - // Do not blame the entry if the parent funtion is not emitted. - StringRef FnName = ParentFunctions[CE->getOrder()]; - if (!CGM.GetGlobalValue(FnName)) - continue; - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for target region in %0 is incorrect: either the " - "address or the ID is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; - continue; - } - createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, - CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); - } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: - OffloadEntryInfoDeviceGlobalVar>( - std::get<0>(E))) { - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = - static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( - CE->getFlags()); - switch (Flags) { - case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { - if (CGM.getLangOpts().OpenMPIsDevice && - CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, "Offloading entry for declare target " - "variable %0 is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); - continue; - } - // The vaiable has no definition - no need to add the entry. - if (CE->getVarSize().isZero()) - continue; - break; - } - case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: - assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || - (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && - "Declaret target link address is set."); - if (CGM.getLangOpts().OpenMPIsDevice) - continue; - if (!CE->getAddress()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, - "Offloading entry for declare target variable is incorrect: the " - "address is invalid."); - CGM.getDiags().Report(DiagID); - continue; + llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = + [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, + const llvm::TargetRegionEntryInfo &EntryInfo) -> void { + SourceLocation Loc; + if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { + for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), + E = CGM.getContext().getSourceManager().fileinfo_end(); + I != E; ++I) { + if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID && + I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) { + Loc = CGM.getContext().getSourceManager().translateFileLineCol( + I->getFirst(), EntryInfo.Line, 1); + break; } - break; } - createOffloadEntry(CE->getAddress(), CE->getAddress(), - CE->getVarSize().getQuantity(), Flags, - CE->getLinkage()); - } else { - llvm_unreachable("Unsupported entry kind."); } - } -} - -/// Loads all the offload entries information from the host IR -/// metadata. -void CGOpenMPRuntime::loadOffloadInfoMetadata() { - // If we are in target mode, load the metadata from the host IR. This code has - // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). - - if (!CGM.getLangOpts().OpenMPIsDevice) - return; - - if (CGM.getLangOpts().OMPHostIRFile.empty()) - return; - - auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); - if (auto EC = Buf.getError()) { - CGM.getDiags().Report(diag::err_cannot_open_file) - << CGM.getLangOpts().OMPHostIRFile << EC.message(); - return; - } - - llvm::LLVMContext C; - auto ME = expectedToErrorOrAndEmitErrors( - C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); - - if (auto EC = ME.getError()) { - unsigned DiagID = CGM.getDiags().getCustomDiagID( - DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); - CGM.getDiags().Report(DiagID) - << CGM.getLangOpts().OMPHostIRFile << EC.message(); - return; - } - - llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); - if (!MD) - return; - - for (llvm::MDNode *MN : MD->operands()) { - auto &&GetMDInt = [MN](unsigned Idx) { - auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); - return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); - }; - - auto &&GetMDString = [MN](unsigned Idx) { - auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); - return V->getString(); - }; - - switch (GetMDInt(0)) { - default: - llvm_unreachable("Unexpected metadata!"); - break; - case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: - OffloadingEntryInfoTargetRegion: - OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), - /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), - /*Order=*/GetMDInt(5)); - break; - case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: - OffloadingEntryInfoDeviceGlobalVar: - OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( - /*MangledName=*/GetMDString(1), - static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( - /*Flags=*/GetMDInt(2)), - /*Order=*/GetMDInt(3)); - break; + switch (Kind) { + case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for target region in " + "%0 is incorrect: either the " + "address or the ID is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, "Offloading entry for declare target " + "variable %0 is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; + } break; + case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { + unsigned DiagID = CGM.getDiags().getCustomDiagID( + DiagnosticsEngine::Error, + "Offloading entry for declare target variable is incorrect: the " + "address is invalid."); + CGM.getDiags().Report(DiagID); + } break; } - } + }; + + OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { @@ -3356,35 +2854,6 @@ void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { } } -QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { - // Make sure the type of the entry is already created. This is the type we - // have to create: - // struct __tgt_offload_entry{ - // void *addr; // Pointer to the offload entry info. - // // (function or global) - // char *name; // Name of the function or global. - // size_t size; // Size of the entry info (0 if it a function). - // int32_t flags; // Flags associated with the entry, e.g. 'link'. - // int32_t reserved; // Reserved, to use by the runtime library. - // }; - if (TgtOffloadEntryQTy.isNull()) { - ASTContext &C = CGM.getContext(); - RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); - RD->startDefinition(); - addFieldToRecordDecl(C, RD, C.VoidPtrTy); - addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); - addFieldToRecordDecl(C, RD, C.getSizeType()); - addFieldToRecordDecl( - C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); - addFieldToRecordDecl( - C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); - RD->completeDefinition(); - RD->addAttr(PackedAttr::CreateImplicit(C)); - TgtOffloadEntryQTy = C.getRecordType(RD); - } - return TgtOffloadEntryQTy; -} - namespace { struct PrivateHelpersTy { PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, @@ -3409,8 +2878,7 @@ static bool isAllocatableDecl(const VarDecl *VD) { return false; const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); // Use the default allocation. - return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || - AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && + return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && !AA->getAllocator()); } @@ -3466,7 +2934,7 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, // kmp_int32 liter; // void * reductions; // }; - RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); + RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union); UD->startDefinition(); addFieldToRecordDecl(C, UD, KmpInt32Ty); addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); @@ -3532,10 +3000,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict(), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); const auto &TaskEntryFnInfo = @@ -3586,12 +3054,12 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } - llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, - TaskPrivatesMap, - CGF.Builder - .CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(CGF), CGF.VoidPtrTy) - .getPointer()}; + llvm::Value *CommonArgs[] = { + GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, + CGF.Builder + .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(), + CGF.VoidPtrTy, CGF.Int8Ty) + .emitRawPointer(CGF)}; SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), std::end(CommonArgs)); if (isOpenMPTaskLoopDirective(Kind)) { @@ -3634,10 +3102,10 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy.withRestrict(), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&GtidArg); Args.push_back(&TaskTypeArg); const auto &DestructorFnInfo = @@ -3668,7 +3136,7 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, if (QualType::DestructionKind DtorKind = Field->getType().isDestructedType()) { LValue FieldLValue = CGF.EmitLValueForField(Base, Field); - CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); + CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType()); } } CGF.FinishFunction(); @@ -3694,7 +3162,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ImplicitParamDecl TaskPrivatesArg( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(PrivatesQTy).withConst().withRestrict(), - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&TaskPrivatesArg); llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; unsigned Counter = 1; @@ -3704,7 +3172,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3715,7 +3183,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3726,7 +3194,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, C.getPointerType(C.getPointerType(E->getType())) .withConst() .withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); PrivateVarsPos[VD] = Counter; ++Counter; @@ -3740,7 +3208,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, Args.push_back(ImplicitParamDecl::Create( C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), - ImplicitParamDecl::Other)); + ImplicitParamKind::Other)); PrivateVarsPos[VD] = Counter; ++Counter; } @@ -3776,7 +3244,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, LValue RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( - RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); + RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>()); CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); ++Counter; } @@ -3811,7 +3279,8 @@ static void emitPrivatesInit(CodeGenFunction &CGF, (IsTargetTask && KmpTaskSharedsPtr.isValid())) { SrcBase = CGF.MakeAddrLValue( CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), + KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), + CGF.ConvertTypeForMem(SharedsTy)), SharedsTy); } FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); @@ -3847,13 +3316,13 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } else if (ForDup) { SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); SharedRefLValue = CGF.MakeAddrLValue( - Address(SharedRefLValue.getPointer(CGF), - C.getDeclAlign(OriginalVD)), + SharedRefLValue.getAddress().withAlignment( + C.getDeclAlign(OriginalVD)), SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), SharedRefLValue.getTBAAInfo()); } else if (CGF.LambdaCaptureFields.count( Pair.second.Original->getCanonicalDecl()) > 0 || - dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { + isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); } else { // Processing for implicitly captured variables. @@ -3871,14 +3340,12 @@ static void emitPrivatesInit(CodeGenFunction &CGF, // Initialize firstprivate array using element-by-element // initialization. CGF.EmitOMPAggregateAssign( - PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), - Type, + PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type, [&CGF, Elem, Init, &CapturesInfo](Address DestElement, Address SrcElement) { // Clean up any temporaries needed by the initialization. CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate( - Elem, [SrcElement]() -> Address { return SrcElement; }); + InitScope.addPrivate(Elem, SrcElement); (void)InitScope.Privatize(); // Emit initialization for single element. CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( @@ -3890,9 +3357,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF, } } else { CodeGenFunction::OMPPrivateScope InitScope(CGF); - InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { - return SharedRefLValue.getAddress(CGF); - }); + InitScope.addPrivate(Elem, SharedRefLValue.getAddress()); (void)InitScope.Privatize(); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); CGF.EmitExprAsInit(Init, VD, PrivateLValue, @@ -3915,7 +3380,7 @@ static bool checkInitIsRequired(CodeGenFunction &CGF, continue; const VarDecl *VD = Pair.second.PrivateCopy; const Expr *Init = VD->getAnyInitializer(); - InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && + InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && !CGF.isTrivialInitializer(Init)); if (InitRequired) break; @@ -3946,12 +3411,12 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, FunctionArgList Args; ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpTaskTWithPrivatesPtrQTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&DstArg); Args.push_back(&SrcArg); Args.push_back(&LastprivArg); @@ -3995,7 +3460,7 @@ emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, Base, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), Loc), - CGM.getNaturalTypeAlignment(SharedsTy)); + CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); } emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); @@ -4038,14 +3503,11 @@ public: for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); - addPrivate(VD, [&CGF, VD]() { - return CGF.CreateMemTemp(VD->getType(), VD->getName()); - }); + addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); const OMPIteratorHelperData &HelperData = E->getHelper(I); - addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { - return CGF.CreateMemTemp(HelperData.CounterVD->getType(), - "counter.addr"); - }); + addPrivate( + HelperData.CounterVD, + CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); } Privatize(); @@ -4056,7 +3518,7 @@ public: HelperData.CounterVD->getType()); // Counter = 0; CGF.EmitStoreOfScalar( - llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), + llvm::ConstantInt::get(CLVal.getAddress().getElementType(), 0), CLVal); CodeGenFunction::JumpDest &ContDest = ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); @@ -4118,11 +3580,12 @@ getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); } } else if (const auto *ASE = - dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { - LValue UpAddrLVal = - CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); - llvm::Value *UpAddr = - CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); + dyn_cast<ArraySectionExpr>(E->IgnoreParenImpCasts())) { + LValue UpAddrLVal = CGF.EmitArraySectionExpr(ASE, /*IsLowerBound=*/false); + Address UpAddrAddress = UpAddrLVal.getAddress(); + llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( + UpAddrAddress.getElementType(), UpAddrAddress.emitRawPointer(CGF), + /*Idx0=*/1); llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); @@ -4255,7 +3718,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, // kmp_routine_entry_t *task_entry); // Task flags. Format is taken from - // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h, + // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, // description of kmp_tasking_flags struct. enum { TiedFlag = 0x1, @@ -4361,18 +3824,18 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, if (NumOfElements) { NumOfElements = CGF.Builder.CreateNUWAdd( llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); - OpaqueValueExpr OVE( + auto *OVE = new (C) OpaqueValueExpr( Loc, C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), - VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, + VK_PRValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, RValue::get(NumOfElements)); - KmpTaskAffinityInfoArrayTy = - C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, - /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); + KmpTaskAffinityInfoArrayTy = C.getVariableArrayType( + KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal, + /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); // Properly emit variable-sized array. auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); CGF.EmitVarDecl(*PD); AffinitiesArray = CGF.GetAddrOfLocalVar(PD); NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, @@ -4381,7 +3844,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( KmpTaskAffinityInfoTy, llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, - ArrayType::Normal, /*IndexTypeQuals=*/0); + ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); AffinitiesArray = CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); @@ -4436,10 +3899,9 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Size; std::tie(Addr, Size) = getPointerAndSize(CGF, E); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); - LValue Base = CGF.MakeAddrLValue( - Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), - AffinitiesArray.getAlignment()), - KmpTaskAffinityInfoTy); + LValue Base = + CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx), + KmpTaskAffinityInfoTy); // affs[i].base_addr = &<Affinities[i].second>; LValue BaseAddrLVal = CGF.EmitLValueForField( Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); @@ -4460,7 +3922,7 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); llvm::Value *GTid = getThreadID(CGF, Loc); llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - AffinitiesArray.getPointer(), CGM.VoidPtrTy); + AffinitiesArray.emitRawPointer(CGF), CGM.VoidPtrTy); // FIXME: Emit the function and ignore its result for now unless the // runtime function is properly implemented. (void)CGF.EmitRuntimeCall( @@ -4471,21 +3933,21 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( NewTask, KmpTaskTWithPrivatesPtrTy); - LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, - KmpTaskTWithPrivatesQTy); + LValue Base = CGF.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy, + KmpTaskTWithPrivatesQTy); LValue TDBase = CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); // Fill the data in the resulting kmp_task_t record. // Copy shareds if there are any. Address KmpTaskSharedsPtr = Address::invalid(); if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { - KmpTaskSharedsPtr = - Address(CGF.EmitLoadOfScalar( - CGF.EmitLValueForField( - TDBase, *std::next(KmpTaskTQTyRD->field_begin(), - KmpTaskTShareds)), - Loc), - CGM.getNaturalTypeAlignment(SharedsTy)); + KmpTaskSharedsPtr = Address( + CGF.EmitLoadOfScalar( + CGF.EmitLValueForField( + TDBase, + *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), + Loc), + CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); @@ -4537,35 +3999,31 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, return Result; } -namespace { -/// Dependence kind for RTL. -enum RTLDependenceKindTy { - DepIn = 0x01, - DepInOut = 0x3, - DepMutexInOutSet = 0x4 -}; -/// Fields ids in kmp_depend_info record. -enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; -} // namespace - /// Translates internal dependency kind into the runtime kind. static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { RTLDependenceKindTy DepKind; switch (K) { case OMPC_DEPEND_in: - DepKind = DepIn; + DepKind = RTLDependenceKindTy::DepIn; break; // Out and InOut dependencies must use the same code. case OMPC_DEPEND_out: case OMPC_DEPEND_inout: - DepKind = DepInOut; + DepKind = RTLDependenceKindTy::DepInOut; break; case OMPC_DEPEND_mutexinoutset: - DepKind = DepMutexInOutSet; + DepKind = RTLDependenceKindTy::DepMutexInOutSet; + break; + case OMPC_DEPEND_inoutset: + DepKind = RTLDependenceKindTy::DepInOutSet; + break; + case OMPC_DEPEND_outallmemory: + DepKind = RTLDependenceKindTy::DepOmpAllMem; break; case OMPC_DEPEND_source: case OMPC_DEPEND_sink: case OMPC_DEPEND_depobj: + case OMPC_DEPEND_inoutallmemory: case OMPC_DEPEND_unknown: llvm_unreachable("Unknown task dependence type"); } @@ -4595,23 +4053,21 @@ CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, getDependTypes(C, KmpDependInfoTy, FlagsTy); RecordDecl *KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); - LValue Base = CGF.EmitLoadOfPointerLValue( - DepobjLVal.getAddress(CGF), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); - Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); - Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), - Base.getTBAAInfo()); - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getPointer(), + LValue Base = CGF.EmitLoadOfPointerLValue( + DepobjLVal.getAddress().withElementType( + CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), + KmpDependInfoPtrTy->castAs<PointerType>()); + Address DepObjAddr = CGF.Builder.CreateGEP( + CGF, Base.getAddress(), llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); + DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // NumDeps = deps[i].base_addr; LValue BaseAddrLVal = CGF.EmitLValueForField( - NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + NumDepsBase, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); return std::make_pair(NumDeps, Base); } @@ -4635,34 +4091,46 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, for (const Expr *E : Data.DepExprs) { llvm::Value *Addr; llvm::Value *Size; - std::tie(Addr, Size) = getPointerAndSize(CGF, E); + + // The expression will be a nullptr in the 'omp_all_memory' case. + if (E) { + std::tie(Addr, Size) = getPointerAndSize(CGF, E); + Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); + } else { + Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); + Size = llvm::ConstantInt::get(CGF.SizeTy, 0); + } LValue Base; if (unsigned *P = Pos.dyn_cast<unsigned *>()) { Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); } else { + assert(E && "Expected a non-null expression"); LValue &PosLVal = *Pos.get<LValue *>(); llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); Base = CGF.MakeAddrLValue( - Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), - DependenciesArray.getAlignment()), - KmpDependInfoTy); + CGF.Builder.CreateGEP(CGF, DependenciesArray, Idx), KmpDependInfoTy); } // deps[i].base_addr = &<Dependencies[i].second>; LValue BaseAddrLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); - CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), - BaseAddrLVal); + Base, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); + CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); // deps[i].len = sizeof(<Dependencies[i].second>); LValue LenLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Len)); + Base, *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::Len))); CGF.EmitStoreOfScalar(Size, LenLVal); // deps[i].flags = <Dependencies[i].first>; RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); LValue FlagsLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), - FlagsLVal); + Base, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::Flags))); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), + FlagsLVal); if (unsigned *P = Pos.dyn_cast<unsigned *>()) { ++(*P); } else { @@ -4675,50 +4143,30 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, } } -static SmallVector<llvm::Value *, 4> -emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, - const OMPTaskDataTy::DependData &Data) { +SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( + CodeGenFunction &CGF, QualType &KmpDependInfoTy, + const OMPTaskDataTy::DependData &Data) { assert(Data.DepKind == OMPC_DEPEND_depobj && - "Expected depobj dependecy kind."); + "Expected depobj dependency kind."); SmallVector<llvm::Value *, 4> Sizes; SmallVector<LValue, 4> SizeLVals; ASTContext &C = CGF.getContext(); - QualType FlagsTy; - getDependTypes(C, KmpDependInfoTy, FlagsTy); - RecordDecl *KmpDependInfoRD = - cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); - QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); - llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); { OMPIteratorGeneratorScope IteratorScope( CGF, cast_or_null<OMPIteratorExpr>( Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() : nullptr)); for (const Expr *E : Data.DepExprs) { + llvm::Value *NumDeps; + LValue Base; LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); - LValue Base = CGF.EmitLoadOfPointerLValue( - DepobjLVal.getAddress(CGF), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); - Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Base.getAddress(CGF), KmpDependInfoPtrT); - Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), - Base.getTBAAInfo()); - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); - LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); - // NumDeps = deps[i].base_addr; - LValue BaseAddrLVal = CGF.EmitLValueForField( - NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); - llvm::Value *NumDeps = - CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); + std::tie(NumDeps, Base) = + getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); LValue NumLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), C.getUIntPtrType()); - CGF.InitTempAlloca(NumLVal.getAddress(CGF), - llvm::ConstantInt::get(CGF.IntPtrTy, 0)); + CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), + NumLVal.getAddress()); llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); CGF.EmitStoreOfScalar(Add, NumLVal); @@ -4733,19 +4181,13 @@ emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, return Sizes; } -static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, - LValue PosLVal, - const OMPTaskDataTy::DependData &Data, - Address DependenciesArray) { +void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, + QualType &KmpDependInfoTy, + LValue PosLVal, + const OMPTaskDataTy::DependData &Data, + Address DependenciesArray) { assert(Data.DepKind == OMPC_DEPEND_depobj && - "Expected depobj dependecy kind."); - ASTContext &C = CGF.getContext(); - QualType FlagsTy; - getDependTypes(C, KmpDependInfoTy, FlagsTy); - RecordDecl *KmpDependInfoRD = - cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); - QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); - llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); + "Expected depobj dependency kind."); llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); { OMPIteratorGeneratorScope IteratorScope( @@ -4754,37 +4196,19 @@ static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, : nullptr)); for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { const Expr *E = Data.DepExprs[I]; + llvm::Value *NumDeps; + LValue Base; LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); - LValue Base = CGF.EmitLoadOfPointerLValue( - DepobjLVal.getAddress(CGF), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); - Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Base.getAddress(CGF), KmpDependInfoPtrT); - Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), - Base.getTBAAInfo()); - - // Get number of elements in a single depobj. - llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getPointer(), - llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); - LValue NumDepsBase = CGF.MakeAddrLValue( - Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, - Base.getBaseInfo(), Base.getTBAAInfo()); - // NumDeps = deps[i].base_addr; - LValue BaseAddrLVal = CGF.EmitLValueForField( - NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); - llvm::Value *NumDeps = - CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); + std::tie(NumDeps, Base) = + getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); // memcopy dependency data. llvm::Value *Size = CGF.Builder.CreateNUWMul( ElSize, CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); - Address DepAddr = - Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), - DependenciesArray.getAlignment()); - CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); + Address DepAddr = CGF.Builder.CreateGEP(CGF, DependenciesArray, Pos); + CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(), Size); // Increase pos. // pos += size; @@ -4818,8 +4242,9 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( bool HasRegularWithIterators = false; llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); llvm::Value *NumOfRegularWithIterators = - llvm::ConstantInt::get(CGF.IntPtrTy, 1); - // Calculate number of depobj dependecies and regular deps with the iterators. + llvm::ConstantInt::get(CGF.IntPtrTy, 0); + // Calculate number of depobj dependencies and regular deps with the + // iterators. for (const OMPTaskDataTy::DependData &D : Dependencies) { if (D.DepKind == OMPC_DEPEND_depobj) { SmallVector<llvm::Value *, 4> Sizes = @@ -4832,13 +4257,20 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( continue; } // Include number of iterations, if any. + if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { + llvm::Value *ClauseIteratorSpace = + llvm::ConstantInt::get(CGF.IntPtrTy, 1); for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); - NumOfRegularWithIterators = - CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); + ClauseIteratorSpace = CGF.Builder.CreateNUWMul(Sz, ClauseIteratorSpace); } + llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( + ClauseIteratorSpace, + llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); + NumOfRegularWithIterators = + CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); HasRegularWithIterators = true; continue; } @@ -4856,18 +4288,18 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( NumOfElements = CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); } - OpaqueValueExpr OVE(Loc, - C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), - VK_RValue); - CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, + auto *OVE = new (C) OpaqueValueExpr( + Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), + VK_PRValue); + CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, RValue::get(NumOfElements)); KmpDependInfoArrayTy = - C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, + C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); // Properly emit variable-sized array. auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); CGF.EmitVarDecl(*PD); DependenciesArray = CGF.GetAddrOfLocalVar(PD); NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, @@ -4875,7 +4307,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( } else { KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, - ArrayType::Normal, /*IndexTypeQuals=*/0); + ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); @@ -4890,7 +4322,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], DependenciesArray); } - // Copy regular dependecies with iterators. + // Copy regular dependencies with iterators. LValue PosLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); @@ -4911,7 +4343,7 @@ std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( } } DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - DependenciesArray, CGF.VoidPtrTy); + DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); return std::make_pair(NumOfElements, DependenciesArray); } @@ -4955,7 +4387,7 @@ Address CGOpenMPRuntime::emitDepobjDependClause( } else { QualType KmpDependInfoArrayTy = C.getConstantArrayType( KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), - nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); + nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); Size = CGM.getSize(Sz.alignTo(Align)); NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); @@ -4970,14 +4402,17 @@ Address CGOpenMPRuntime::emitDepobjDependClause( CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_alloc), Args, ".dep.arr.addr"); + llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); - DependenciesArray = Address(Addr, Align); + Addr, KmpDependInfoLlvmTy->getPointerTo()); + DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); // Write number of elements in the first element of array for depobj. LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); // deps[i].base_addr = NumDependencies; LValue BaseAddrLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); + Base, + *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); llvm::PointerUnion<unsigned *, LValue *> Pos; unsigned Idx = 1; @@ -4994,7 +4429,8 @@ Address CGOpenMPRuntime::emitDepobjDependClause( } emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); + CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, + CGF.Int8Ty); return DependenciesArray; } @@ -5003,14 +4439,14 @@ void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, ASTContext &C = CGM.getContext(); QualType FlagsTy; getDependTypes(C, KmpDependInfoTy, FlagsTy); - LValue Base = CGF.EmitLoadOfPointerLValue( - DepobjLVal.getAddress(CGF), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + LValue Base = CGF.EmitLoadOfPointerLValue(DepobjLVal.getAddress(), + C.VoidPtrTy.castAs<PointerType>()); QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); + Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), + CGF.ConvertTypeForMem(KmpDependInfoTy)); llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( - Addr.getPointer(), + Addr.getElementType(), Addr.emitRawPointer(CGF), llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, CGF.VoidPtrTy); @@ -5038,9 +4474,10 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, LValue Base; std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); - Address Begin = Base.getAddress(CGF); + Address Begin = Base.getAddress(); // Cast from pointer to array type to pointer to single element. - llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); + llvm::Value *End = CGF.Builder.CreateGEP(Begin.getElementType(), + Begin.emitRawPointer(CGF), NumDeps); // The basic structure here is a while-do loop. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); @@ -5048,24 +4485,26 @@ void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, CGF.EmitBlock(BodyBB); llvm::PHINode *ElementPHI = CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); - ElementPHI->addIncoming(Begin.getPointer(), EntryBB); - Begin = Address(ElementPHI, Begin.getAlignment()); + ElementPHI->addIncoming(Begin.emitRawPointer(CGF), EntryBB); + Begin = Begin.withPointer(ElementPHI, KnownNonNull); Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); // deps[i].flags = NewDepKind; RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); LValue FlagsLVal = CGF.EmitLValueForField( - Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); - CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), - FlagsLVal); + Base, *std::next(KmpDependInfoRD->field_begin(), + static_cast<unsigned int>(RTLDependInfoFields::Flags))); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), + FlagsLVal); // Shift the address forward by one element. - Address ElementNext = - CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); - ElementPHI->addIncoming(ElementNext.getPointer(), - CGF.Builder.GetInsertBlock()); + llvm::Value *ElementNext = + CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext") + .emitRawPointer(CGF); + ElementPHI->addIncoming(ElementNext, CGF.Builder.GetInsertBlock()); llvm::Value *IsEmpty = - CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); + CGF.Builder.CreateICmpEQ(ElementNext, End, "omp.isempty"); CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); // Done. CGF.EmitBlock(DoneBB, /*IsFinished=*/true); @@ -5108,7 +4547,7 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, DepTaskArgs[1] = ThreadID; DepTaskArgs[2] = NewTask; DepTaskArgs[3] = NumOfElements; - DepTaskArgs[4] = DependenciesArray.getPointer(); + DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF); DepTaskArgs[5] = CGF.Builder.getInt32(0); DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } @@ -5135,14 +4574,16 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, Region->emitUntiedSwitch(CGF); }; - llvm::Value *DepWaitTaskArgs[6]; + llvm::Value *DepWaitTaskArgs[7]; if (!Data.Dependences.empty()) { DepWaitTaskArgs[0] = UpLoc; DepWaitTaskArgs[1] = ThreadID; DepWaitTaskArgs[2] = NumOfElements; - DepWaitTaskArgs[3] = DependenciesArray.getPointer(); + DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + DepWaitTaskArgs[6] = + llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); } auto &M = CGM.getModule(); auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, @@ -5154,9 +4595,9 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info // is specified. if (!Data.Dependences.empty()) - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), - DepWaitTaskArgs); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_deps_51), + DepWaitTaskArgs); // Call proxy_task_entry(gtid, new_task); auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { @@ -5219,24 +4660,21 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); const auto *LBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), - LBLVal.getQuals(), + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), /*IsInitializer=*/true); LValue UBLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); const auto *UBVar = cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), - UBLVal.getQuals(), + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), /*IsInitializer=*/true); LValue StLVal = CGF.EmitLValueForField( Result.TDBase, *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); const auto *StVar = cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); - CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), - StLVal.getQuals(), + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), /*IsInitializer=*/true); // Store reductions address. LValue RedLVal = CGF.EmitLValueForField( @@ -5245,7 +4683,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, if (Data.Reductions) { CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); } else { - CGF.EmitNullInitialization(RedLVal.getAddress(CGF), + CGF.EmitNullInitialization(RedLVal.getAddress(), CGF.getContext().VoidPtrTy); } enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; @@ -5300,10 +4738,11 @@ static void EmitOMPAggregateReduction( const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); - llvm::Value *RHSBegin = RHSAddr.getPointer(); - llvm::Value *LHSBegin = LHSAddr.getPointer(); + llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF); + llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF); // Cast from pointer to array type to pointer to single element. - llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); + llvm::Value *LHSEnd = + CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); // The basic structure here is a while-do loop. llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); @@ -5320,30 +4759,32 @@ static void EmitOMPAggregateReduction( llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); RHSElementPHI->addIncoming(RHSBegin, EntryBB); - Address RHSElementCurrent = - Address(RHSElementPHI, - RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + Address RHSElementCurrent( + RHSElementPHI, RHSAddr.getElementType(), + RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); LHSElementPHI->addIncoming(LHSBegin, EntryBB); - Address LHSElementCurrent = - Address(LHSElementPHI, - LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); + Address LHSElementCurrent( + LHSElementPHI, LHSAddr.getElementType(), + LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); // Emit copy. CodeGenFunction::OMPPrivateScope Scope(CGF); - Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); - Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); + Scope.addPrivate(LHSVar, LHSElementCurrent); + Scope.addPrivate(RHSVar, RHSElementCurrent); Scope.Privatize(); RedOpGen(CGF, XExpr, EExpr, UpExpr); Scope.ForceCleanup(); // Shift the address forward by one element. llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( - LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, + "omp.arraycpy.dest.element"); llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( - RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); + RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, + "omp.arraycpy.src.element"); // Check whether we've reached the end. llvm::Value *Done = CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); @@ -5377,22 +4818,22 @@ static void emitReductionCombiner(CodeGenFunction &CGF, } llvm::Function *CGOpenMPRuntime::emitReductionFunction( - SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, - ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, - ArrayRef<const Expr *> ReductionOps) { + StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType, + ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, + ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { ASTContext &C = CGM.getContext(); // void reduction_func(void *LHSArg, void *RHSArg); FunctionArgList Args; ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.push_back(&LHSArg); Args.push_back(&RHSArg); const auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); - std::string Name = getName({"omp", "reduction", "reduction_func"}); + std::string Name = getReductionFuncName(ReducerName); auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -5404,29 +4845,27 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction( // Dst = (void*[n])(LHSArg); // Src = (void*[n])(RHSArg); Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), - ArgsType), CGF.getPointerAlign()); + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), + ArgsElemType->getPointerTo()), + ArgsElemType, CGF.getPointerAlign()); Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), - ArgsType), CGF.getPointerAlign()); + CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), + ArgsElemType->getPointerTo()), + ArgsElemType, CGF.getPointerAlign()); // ... // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); // ... CodeGenFunction::OMPPrivateScope Scope(CGF); - auto IPriv = Privates.begin(); + const auto *IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); - Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { - return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); - }); + Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); - Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { - return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); - }); + Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); QualType PrivTy = (*IPriv)->getType(); if (PrivTy->isVariablyModifiedType()) { // Get array size and emit VLA type. @@ -5443,8 +4882,8 @@ llvm::Function *CGOpenMPRuntime::emitReductionFunction( } Scope.Privatize(); IPriv = Privates.begin(); - auto ILHS = LHSExprs.begin(); - auto IRHS = RHSExprs.begin(); + const auto *ILHS = LHSExprs.begin(); + const auto *IRHS = RHSExprs.begin(); for (const Expr *E : ReductionOps) { if ((*IPriv)->getType()->isArrayType()) { // Emit reduction for array section. @@ -5539,9 +4978,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, if (SimpleReduction) { CodeGenFunction::RunCleanupsScope Scope(CGF); - auto IPriv = Privates.begin(); - auto ILHS = LHSExprs.begin(); - auto IRHS = RHSExprs.begin(); + const auto *IPriv = Privates.begin(); + const auto *ILHS = LHSExprs.begin(); + const auto *IRHS = RHSExprs.begin(); for (const Expr *E : ReductionOps) { emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); @@ -5561,12 +5000,12 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ++Size; } llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); - QualType ReductionArrayTy = - C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Address ReductionList = + QualType ReductionArrayTy = C.getConstantArrayType( + C.VoidPtrTy, ArraySize, nullptr, ArraySizeModifier::Normal, + /*IndexTypeQuals=*/0); + RawAddress ReductionList = CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); - auto IPriv = Privates.begin(); + const auto *IPriv = Privates.begin(); unsigned Idx = 0; for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); @@ -5590,8 +5029,8 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, // 2. Emit reduce_func(). llvm::Function *ReductionFn = emitReductionFunction( - Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, - LHSExprs, RHSExprs, ReductionOps); + CGF.CurFn->getName(), Loc, CGF.ConvertTypeForMem(ReductionArrayTy), + Privates, LHSExprs, RHSExprs, ReductionOps); // 3. Create static kmp_critical_name lock = { 0 }; std::string Name = getName({"reduction"}); @@ -5643,9 +5082,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( CodeGenFunction &CGF, PrePostActionTy &Action) { CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); - auto IPriv = Privates.begin(); - auto ILHS = LHSExprs.begin(); - auto IRHS = RHSExprs.begin(); + const auto *IPriv = Privates.begin(); + const auto *ILHS = LHSExprs.begin(); + const auto *IRHS = RHSExprs.begin(); for (const Expr *E : ReductionOps) { RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), cast<DeclRefExpr>(*IRHS)); @@ -5656,7 +5095,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, }; RegionCodeGenTy RCG(CodeGen); CommonActionTy Action( - nullptr, llvm::None, + nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait : OMPRTL___kmpc_end_reduce), @@ -5677,9 +5116,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( CodeGenFunction &CGF, PrePostActionTy &Action) { - auto ILHS = LHSExprs.begin(); - auto IRHS = RHSExprs.begin(); - auto IPriv = Privates.begin(); + const auto *ILHS = LHSExprs.begin(); + const auto *IRHS = RHSExprs.begin(); + const auto *IPriv = Privates.begin(); for (const Expr *E : ReductionOps) { const Expr *XExpr = nullptr; const Expr *EExpr = nullptr; @@ -5721,14 +5160,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, llvm::AtomicOrdering::Monotonic, Loc, [&CGF, UpExpr, VD, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate( - VD, [&CGF, VD, XRValue, Loc]() { - Address LHSTemp = CGF.CreateMemTemp(VD->getType()); - CGF.emitOMPSimpleStore( - CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, - VD->getType().getNonReferenceType(), Loc); - return LHSTemp; - }); + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.emitOMPSimpleStore( + CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, + VD->getType().getNonReferenceType(), Loc); + PrivateScope.addPrivate(VD, LHSTemp); (void)PrivateScope.Privatize(); return CGF.EmitAnyExpr(UpExpr); }); @@ -5781,7 +5217,7 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ThreadId, // i32 <gtid> Lock // kmp_critical_name *&<lock> }; - CommonActionTy Action(nullptr, llvm::None, + CommonActionTy Action(nullptr, std::nullopt, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_end_reduce), EndArgs); @@ -5829,9 +5265,9 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, VoidPtrTy.addRestrict(); FunctionArgList Args; ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.emplace_back(&Param); Args.emplace_back(&ParamOrig); const auto &FnInfo = @@ -5844,9 +5280,11 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, Fn->setDoesNotRecurse(); CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); + QualType PrivateType = RCG.getPrivateType(N); Address PrivateAddr = CGF.EmitLoadOfPointer( - CGF.GetAddrOfLocalVar(&Param), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + CGF.GetAddrOfLocalVar(&Param).withElementType( + CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), + C.getPointerType(PrivateType)->castAs<PointerType>()); llvm::Value *Size = nullptr; // If the size of the reduction item is non-constant, load it from global // threadprivate variable. @@ -5858,25 +5296,20 @@ static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, CGM.getContext().getSizeType(), Loc); } RCG.emitAggregateType(CGF, N, Size); - LValue OrigLVal; + Address OrigAddr = Address::invalid(); // If initializer uses initializer from declare reduction construct, emit a // pointer to the address of the original reduction item (reuired by reduction // initializer) if (RCG.usesReductionInitializer(N)) { Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); - SharedAddr = CGF.EmitLoadOfPointer( + OrigAddr = CGF.EmitLoadOfPointer( SharedAddr, CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); - OrigLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy); - } else { - OrigLVal = CGF.MakeNaturalAlignAddrLValue( - llvm::ConstantPointerNull::get(CGM.VoidPtrTy), - CGM.getContext().VoidPtrTy); } // Emit the initializer: // %0 = bitcast void* %arg to <type>* // store <type> <init>, <type>* %0 - RCG.emitInitialization(CGF, N, PrivateAddr, OrigLVal, + RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, [](CodeGenFunction &) { return false; }); CGF.FinishFunction(); return Fn; @@ -5903,9 +5336,9 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); FunctionArgList Args; ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.emplace_back(&ParamInOut); Args.emplace_back(&ParamIn); const auto &FnInfo = @@ -5933,22 +5366,21 @@ static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, // %lhs = bitcast void* %arg0 to <type>* // %rhs = bitcast void* %arg1 to <type>* CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() { - // Pull out the pointer to the variable. - Address PtrAddr = CGF.EmitLoadOfPointer( - CGF.GetAddrOfLocalVar(&ParamInOut), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); - return CGF.Builder.CreateElementBitCast( - PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType())); - }); - PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() { - // Pull out the pointer to the variable. - Address PtrAddr = CGF.EmitLoadOfPointer( - CGF.GetAddrOfLocalVar(&ParamIn), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); - return CGF.Builder.CreateElementBitCast( - PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType())); - }); + PrivateScope.addPrivate( + LHSVD, + // Pull out the pointer to the variable. + CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamInOut) + .withElementType( + CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), + C.getPointerType(LHSVD->getType())->castAs<PointerType>())); + PrivateScope.addPrivate( + RHSVD, + // Pull out the pointer to the variable. + CGF.EmitLoadOfPointer( + CGF.GetAddrOfLocalVar(&ParamIn).withElementType( + CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), + C.getPointerType(RHSVD->getType())->castAs<PointerType>())); PrivateScope.Privatize(); // Emit the combiner body: // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) @@ -5976,7 +5408,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, ASTContext &C = CGM.getContext(); FunctionArgList Args; ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); Args.emplace_back(&Param); const auto &FnInfo = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); @@ -5989,8 +5421,7 @@ static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, CodeGenFunction CGF(CGM); CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); Address PrivateAddr = CGF.EmitLoadOfPointer( - CGF.GetAddrOfLocalVar(&Param), - C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); + CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); llvm::Value *Size = nullptr; // If the size of the reduction item is non-constant, load it from global // threadprivate variable. @@ -6040,10 +5471,11 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( QualType RDType = C.getRecordType(RD); unsigned Size = Data.ReductionVars.size(); llvm::APInt ArraySize(/*numBits=*/64, Size); - QualType ArrayRDType = C.getConstantArrayType( - RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); + QualType ArrayRDType = + C.getConstantArrayType(RDType, ArraySize, nullptr, + ArraySizeModifier::Normal, /*IndexTypeQuals=*/0); // kmp_task_red_input_t .rd_input.[Size]; - Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); + RawAddress TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, Data.ReductionCopies, Data.ReductionOps); for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { @@ -6051,21 +5483,19 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( - TaskRedInput.getPointer(), Idxs, + TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, ".rd_input.gep."); - LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); + LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(GEP, RDType); // ElemLVal.reduce_shar = &Shareds[Cnt]; LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); RCG.emitSharedOrigLValue(CGF, Cnt); - llvm::Value *CastedShared = - CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); - CGF.EmitStoreOfScalar(CastedShared, SharedLVal); + llvm::Value *Shared = RCG.getSharedLValue(Cnt).getPointer(CGF); + CGF.EmitStoreOfScalar(Shared, SharedLVal); // ElemLVal.reduce_orig = &Origs[Cnt]; LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); - llvm::Value *CastedOrig = - CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); - CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); + llvm::Value *Orig = RCG.getOrigLValue(Cnt).getPointer(CGF); + CGF.EmitStoreOfScalar(Orig, OrigLVal); RCG.emitAggregateType(CGF, Cnt); llvm::Value *SizeValInChars; llvm::Value *SizeVal; @@ -6082,21 +5512,19 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); // ElemLVal.reduce_init = init; LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); - llvm::Value *InitAddr = - CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); + llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, Cnt); CGF.EmitStoreOfScalar(InitAddr, InitLVal); // ElemLVal.reduce_fini = fini; LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); - llvm::Value *FiniAddr = Fini - ? CGF.EmitCastToVoidPtr(Fini) - : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + llvm::Value *FiniAddr = + Fini ? Fini : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); // ElemLVal.reduce_comb = comb; LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); - llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( + llvm::Value *CombAddr = emitReduceCombFunction( CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], - RHSExprs[Cnt], Data.ReductionCopies[Cnt])); + RHSExprs[Cnt], Data.ReductionCopies[Cnt]); CGF.EmitStoreOfScalar(CombAddr, CombLVal); // ElemLVal.flags = 0; LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); @@ -6105,8 +5533,7 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), FlagsLVal); } else - CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), - FlagsLVal.getType()); + CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType()); } if (Data.IsReductionWithTaskMod) { // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int @@ -6190,24 +5617,56 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), Args), - SharedLVal.getAlignment()); + CGF.Int8Ty, SharedLVal.getAlignment()); } -void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, - SourceLocation Loc) { +void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, + const OMPTaskDataTy &Data) { if (!CGF.HaveInsertPoint()) return; - if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { + if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { + // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. OMPBuilder.createTaskwait(CGF.Builder); } else { - // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 - // global_tid); - llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; - // Ignore return result until untied tasks are supported. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_omp_taskwait), - Args); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + auto &M = CGM.getModule(); + Address DependenciesArray = Address::invalid(); + llvm::Value *NumOfElements; + std::tie(NumOfElements, DependenciesArray) = + emitDependClause(CGF, Data.Dependences, Loc); + if (!Data.Dependences.empty()) { + llvm::Value *DepWaitTaskArgs[7]; + DepWaitTaskArgs[0] = UpLoc; + DepWaitTaskArgs[1] = ThreadID; + DepWaitTaskArgs[2] = NumOfElements; + DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); + DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); + DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + DepWaitTaskArgs[6] = + llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); + + CodeGenFunction::RunCleanupsScope LocalScope(CGF); + + // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, + // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, + // kmp_int32 has_no_wait); if dependence info is specified. + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_deps_51), + DepWaitTaskArgs); + + } else { + + // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + llvm::Value *Args[] = {UpLoc, ThreadID}; + // Ignore return result until untied tasks are supported. + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), + Args); + } } if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) @@ -6222,7 +5681,8 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF, return; InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel, InnerKind != OMPD_critical && - InnerKind != OMPD_master); + InnerKind != OMPD_master && + InnerKind != OMPD_masked); CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr); } @@ -6272,6 +5732,7 @@ void CGOpenMPRuntime::emitCancellationPointCall( CGM.getModule(), OMPRTL___kmpc_cancellationpoint), Args); // if (__kmpc_cancellationpoint()) { + // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only // exit from construct; // } llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); @@ -6279,6 +5740,8 @@ void CGOpenMPRuntime::emitCancellationPointCall( llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); + if (CancelRegion == OMPD_parallel) + emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; CodeGenFunction::JumpDest CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); @@ -6308,6 +5771,7 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Result = CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args); // if (__kmpc_cancel()) { + // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only // exit from construct; // } llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); @@ -6315,6 +5779,8 @@ void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); CGF.EmitBlock(ExitBB); + if (CancelRegion == OMPD_parallel) + RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false); // exit from construct; CodeGenFunction::JumpDest CancelDest = CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); @@ -6363,7 +5829,7 @@ void CGOpenMPRuntime::emitTargetOutlinedFunction( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - assert(!ParentName.empty() && "Invalid target region parent name!"); + assert(!ParentName.empty() && "Invalid target entry parent name!"); HasEmittedTargetRegion = true; SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators; for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) { @@ -6394,19 +5860,18 @@ void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF, .getLimitedValue()); LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits); Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy); + AllocatorTraitsLVal.getAddress(), CGF.VoidPtrPtrTy, CGF.VoidPtrTy); AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy, AllocatorTraitsLVal.getBaseInfo(), AllocatorTraitsLVal.getTBAAInfo()); - llvm::Value *Traits = - CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc()); + llvm::Value *Traits = Addr.emitRawPointer(CGF); llvm::Value *AllocatorVal = CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_init_allocator), {ThreadId, MemSpaceHandle, NumTraits, Traits}); // Store to allocator. - CGF.EmitVarDecl(*cast<VarDecl>( + CGF.EmitAutoVarAlloca(*cast<VarDecl>( cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl())); LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts()); AllocatorVal = @@ -6431,73 +5896,74 @@ void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF, {ThreadId, AllocatorVal}); } +void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams( + const OMPExecutableDirective &D, CodeGenFunction &CGF, + int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal, + int32_t &MaxTeamsVal) { + + getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal); + getNumThreadsExprForTargetDirective(CGF, D, MaxThreadsVal, + /*UpperBoundOnly=*/true); + + for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { + for (auto *A : C->getAttrs()) { + int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1; + int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1; + if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A)) + CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal, + &AttrMinBlocksVal, &AttrMaxBlocksVal); + else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A)) + CGM.handleAMDGPUFlatWorkGroupSizeAttr( + nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal, + &AttrMaxThreadsVal); + else + continue; + + MinThreadsVal = std::max(MinThreadsVal, AttrMinThreadsVal); + if (AttrMaxThreadsVal > 0) + MaxThreadsVal = MaxThreadsVal > 0 + ? std::min(MaxThreadsVal, AttrMaxThreadsVal) + : AttrMaxThreadsVal; + MinTeamsVal = std::max(MinTeamsVal, AttrMinBlocksVal); + if (AttrMaxBlocksVal > 0) + MaxTeamsVal = MaxTeamsVal > 0 ? std::min(MaxTeamsVal, AttrMaxBlocksVal) + : AttrMaxBlocksVal; + } + } +} + void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) { - // Create a unique name for the entry function using the source location - // information of the current target region. The name will be something like: - // - // __omp_offloading_DD_FFFF_PP_lBB - // - // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the - // mangled name of the function that encloses the target region and BB is the - // line number of the target region. - - unsigned DeviceID; - unsigned FileID; - unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID, - Line); - SmallString<64> EntryFnName; - { - llvm::raw_svector_ostream OS(EntryFnName); - OS << "__omp_offloading" << llvm::format("_%x", DeviceID) - << llvm::format("_%x_", FileID) << ParentName << "_l" << Line; - } - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); + llvm::TargetRegionEntryInfo EntryInfo = + getEntryInfoFromPresumedLoc(CGM, OMPBuilder, D.getBeginLoc(), ParentName); CodeGenFunction CGF(CGM, true); - CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction = + [&CGF, &D, &CodeGen](StringRef EntryFnName) { + const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); + + CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + }; - OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc()); + OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, + IsOffloadEntry, OutlinedFn, OutlinedFnID); - // If this target outline function is not an offload entry, we don't need to - // register it. - if (!IsOffloadEntry) + if (!OutlinedFn) return; - // The target region ID is used by the runtime library to identify the current - // target region, so it only has to be unique and not necessarily point to - // anything. It could be the pointer to the outlined function that implements - // the target region, but we aren't using that so that the compiler doesn't - // need to keep that, and could therefore inline the host function if proven - // worthwhile during optimization. In the other hand, if emitting code for the - // device, the ID has to be the function address so that it can retrieved from - // the offloading entry and launched by the runtime library. We also mark the - // outlined function to have external linkage in case we are emitting code for - // the device, because these functions will be entry points to the device. - - if (CGM.getLangOpts().OpenMPIsDevice) { - OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy); - OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage); - OutlinedFn->setDSOLocal(false); - if (CGM.getTriple().isAMDGCN()) - OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); - } else { - std::string Name = getName({EntryFnName, "region_id"}); - OutlinedFnID = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, - llvm::GlobalValue::WeakAnyLinkage, - llvm::Constant::getNullValue(CGM.Int8Ty), Name); - } + CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); - // Register the information for the entry associated with this target region. - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( - DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID, - OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); + for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) { + for (auto *A : C->getAttrs()) { + if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A)) + CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr); + } + } } /// Checks if the expression is constant or does not have non-trivial function @@ -6526,7 +5992,7 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, continue; // Analyze declarations. if (const auto *DS = dyn_cast<DeclStmt>(S)) { - if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (llvm::all_of(DS->decls(), [](const Decl *D) { if (isa<EmptyDecl>(D) || isa<DeclContext>(D) || isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) || isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) || @@ -6537,10 +6003,7 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, const auto *VD = dyn_cast<VarDecl>(D); if (!VD) return false; - return VD->isConstexpr() || - ((VD->getType().isTrivialType(Ctx) || - VD->getType()->isReferenceType()) && - (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + return VD->hasGlobalStorage() || !VD->isUsed(); })) continue; } @@ -6555,24 +6018,13 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, return Child; } -/// Emit the number of teams for a target directive. Inspect the num_teams -/// clause associated with a teams construct combined or closely nested -/// with the target directive. -/// -/// Emit a team of size one for directives such as 'target parallel' that -/// have no associated teams construct. -/// -/// Otherwise, return nullptr. -static llvm::Value * -emitNumTeamsForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && - "Clauses associated with the teams directive expected to be emitted " - "only for the host!"); +const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal, + int32_t &MaxTeamsVal) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && "Expected target-based executable directive."); - CGBuilderTy &Bld = CGF.Builder; switch (DirectiveKind) { case OMPD_target: { const auto *CS = D.getInnermostCapturedStmt(); @@ -6584,50 +6036,57 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) { if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams(); - llvm::Value *NumTeamsVal = - CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); - return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + if (NumTeams->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = + NumTeams->getIntegerConstantExpr(CGF.getContext())) + MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); + return NumTeams; } - return Bld.getInt32(0); + MinTeamsVal = MaxTeamsVal = 0; + return nullptr; } if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || - isOpenMPSimdDirective(NestedDir->getDirectiveKind())) - return Bld.getInt32(1); - return Bld.getInt32(0); + isOpenMPSimdDirective(NestedDir->getDirectiveKind())) { + MinTeamsVal = MaxTeamsVal = 1; + return nullptr; + } + MinTeamsVal = MaxTeamsVal = 1; + return nullptr; } + // A value of -1 is used to check if we need to emit no teams region + MinTeamsVal = MaxTeamsVal = -1; return nullptr; } + case OMPD_target_teams_loop: case OMPD_target_teams: case OMPD_target_teams_distribute: case OMPD_target_teams_distribute_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { if (D.hasClausesOfKind<OMPNumTeamsClause>()) { - CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); const Expr *NumTeams = D.getSingleClause<OMPNumTeamsClause>()->getNumTeams(); - llvm::Value *NumTeamsVal = - CGF.EmitScalarExpr(NumTeams, - /*IgnoreResultAssign*/ true); - return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, - /*isSigned=*/true); + if (NumTeams->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext())) + MinTeamsVal = MaxTeamsVal = Constant->getExtValue(); + return NumTeams; } - return Bld.getInt32(0); + MinTeamsVal = MaxTeamsVal = 0; + return nullptr; } case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: + case OMPD_target_parallel_loop: case OMPD_target_simd: - return Bld.getInt32(1); + MinTeamsVal = MaxTeamsVal = 1; + return nullptr; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: + case OMPD_parallel_loop: case OMPD_parallel_master: case OMPD_parallel_sections: case OMPD_for_simd: @@ -6639,6 +6098,8 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: + case OMPD_unroll: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -6680,6 +6141,7 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, case OMPD_parallel_master_taskloop: case OMPD_parallel_master_taskloop_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: break; default: @@ -6688,150 +6150,211 @@ emitNumTeamsForTargetDirective(CodeGenFunction &CGF, llvm_unreachable("Unexpected directive kind."); } -static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, - llvm::Value *DefaultThreadLimitVal) { +llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D) { + assert(!CGF.getLangOpts().OpenMPIsTargetDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + CGBuilderTy &Bld = CGF.Builder; + int32_t MinNT = -1, MaxNT = -1; + const Expr *NumTeams = + getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT); + if (NumTeams != nullptr) { + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + + switch (DirectiveKind) { + case OMPD_target: { + const auto *CS = D.getInnermostCapturedStmt(); + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*isSigned=*/true); + } + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); + llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*isSigned=*/true); + } + default: + break; + } + } + + assert(MinNT == MaxNT && "Num threads ranges require handling here."); + return llvm::ConstantInt::get(CGF.Int32Ty, MinNT); +} + +/// Check for a num threads constant value (stored in \p DefaultVal), or +/// expression (stored in \p E). If the value is conditional (via an if-clause), +/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are +/// nullptr, no expression evaluation is perfomed. +static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, + const Expr **E, int32_t &UpperBound, + bool UpperBoundOnly, llvm::Value **CondVal) { const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); - if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { - if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { - llvm::Value *NumThreads = nullptr; - llvm::Value *CondVal = nullptr; - // Handle if clause. If if clause present, the number of threads is - // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. - if (Dir->hasClausesOfKind<OMPIfClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - const OMPIfClause *IfClause = nullptr; - for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { - if (C->getNameModifier() == OMPD_unknown || - C->getNameModifier() == OMPD_parallel) { - IfClause = C; - break; - } + const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); + if (!Dir) + return; + + if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const OMPIfClause *IfClause = nullptr; + for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_parallel) { + IfClause = C; + break; } - if (IfClause) { - const Expr *Cond = IfClause->getCondition(); - bool Result; - if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { - if (!Result) - return CGF.Builder.getInt32(1); - } else { - CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange()); - if (const auto *PreInit = - cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr<OMPCaptureNoInitAttr>()) { - CGF.EmitVarDecl(cast<VarDecl>(*I)); - } else { - CodeGenFunction::AutoVarEmission Emission = - CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); - CGF.EmitAutoVarCleanups(Emission); - } + } + if (IfClause) { + const Expr *CondExpr = IfClause->getCondition(); + bool Result; + if (CondExpr->EvaluateAsBooleanCondition(Result, CGF.getContext())) { + if (!Result) { + UpperBound = 1; + return; + } + } else { + CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); } } - CondVal = CGF.EvaluateExprAsBool(Cond); + *CondVal = CGF.EvaluateExprAsBool(CondExpr); } } } - // Check the value of num_threads clause iff if clause was not specified - // or is not evaluated to false. - if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - const auto *NumThreadsClause = - Dir->getSingleClause<OMPNumThreadsClause>(); - CodeGenFunction::LexicalScope Scope( - CGF, NumThreadsClause->getNumThreads()->getSourceRange()); - if (const auto *PreInit = - cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr<OMPCaptureNoInitAttr>()) { - CGF.EmitVarDecl(cast<VarDecl>(*I)); - } else { - CodeGenFunction::AutoVarEmission Emission = - CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); - CGF.EmitAutoVarCleanups(Emission); - } + } + // Check the value of num_threads clause iff if clause was not specified + // or is not evaluated to false. + if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const auto *NumThreadsClause = + Dir->getSingleClause<OMPNumThreadsClause>(); + const Expr *NTExpr = NumThreadsClause->getNumThreads(); + if (NTExpr->isIntegerConstantExpr(CGF.getContext())) + if (auto Constant = NTExpr->getIntegerConstantExpr(CGF.getContext())) + UpperBound = + UpperBound + ? Constant->getZExtValue() + : std::min(UpperBound, + static_cast<int32_t>(Constant->getZExtValue())); + // If we haven't found a upper bound, remember we saw a thread limiting + // clause. + if (UpperBound == -1) + UpperBound = 0; + if (!E) + return; + CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); } } - NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); - NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, - /*isSigned=*/false); - if (DefaultThreadLimitVal) - NumThreads = CGF.Builder.CreateSelect( - CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads), - DefaultThreadLimitVal, NumThreads); - } else { - NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal - : CGF.Builder.getInt32(0); - } - // Process condition of the if clause. - if (CondVal) { - NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads, - CGF.Builder.getInt32(1)); } - return NumThreads; + *E = NTExpr; } - if (isOpenMPSimdDirective(Dir->getDirectiveKind())) - return CGF.Builder.getInt32(1); - return DefaultThreadLimitVal; + return; } - return DefaultThreadLimitVal ? DefaultThreadLimitVal - : CGF.Builder.getInt32(0); + if (isOpenMPSimdDirective(Dir->getDirectiveKind())) + UpperBound = 1; } -/// Emit the number of threads for a target directive. Inspect the -/// thread_limit clause associated with a teams construct combined or closely -/// nested with the target directive. -/// -/// Emit the num_threads clause for directives such as 'target parallel' that -/// have no associated teams construct. -/// -/// Otherwise, return nullptr. -static llvm::Value * -emitNumThreadsForTargetDirective(CodeGenFunction &CGF, - const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && +const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound, + bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) { + assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) && "Clauses associated with the teams directive expected to be emitted " "only for the host!"); OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); assert(isOpenMPTargetExecutionDirective(DirectiveKind) && "Expected target-based executable directive."); - CGBuilderTy &Bld = CGF.Builder; - llvm::Value *ThreadLimitVal = nullptr; - llvm::Value *NumThreadsVal = nullptr; + + const Expr *NT = nullptr; + const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT; + + auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) { + if (E->isIntegerConstantExpr(CGF.getContext())) { + if (auto Constant = E->getIntegerConstantExpr(CGF.getContext())) + UpperBound = UpperBound ? Constant->getZExtValue() + : std::min(UpperBound, + int32_t(Constant->getZExtValue())); + } + // If we haven't found a upper bound, remember we saw a thread limiting + // clause. + if (UpperBound == -1) + UpperBound = 0; + if (EPtr) + *EPtr = E; + }; + + auto ReturnSequential = [&]() { + UpperBound = 1; + return NT; + }; + switch (DirectiveKind) { case OMPD_target: { const CapturedStmt *CS = D.getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); + // TODO: The standard is not clear how to resolve two thread limit clauses, + // let's pick the teams one if it's present, otherwise the target one. + const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { - if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) { - CGOpenMPInnerExprInfo CGInfo(CGF, *CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - const auto *ThreadLimitClause = - Dir->getSingleClause<OMPThreadLimitClause>(); - CodeGenFunction::LexicalScope Scope( - CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); - if (const auto *PreInit = - cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { - for (const auto *I : PreInit->decls()) { - if (!I->hasAttr<OMPCaptureNoInitAttr>()) { - CGF.EmitVarDecl(cast<VarDecl>(*I)); - } else { - CodeGenFunction::AutoVarEmission Emission = - CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); - CGF.EmitAutoVarCleanups(Emission); + if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) { + ThreadLimitClause = TLC; + if (ThreadLimitExpr) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + CodeGenFunction::LexicalScope Scope( + CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); + if (const auto *PreInit = + cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr<OMPCaptureNoInitAttr>()) { + CGF.EmitVarDecl(cast<VarDecl>(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast<VarDecl>(*I)); + CGF.EmitAutoVarCleanups(Emission); + } } } } - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); } + } + if (ThreadLimitClause) + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); + if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { CS = Dir->getInnermostCapturedStmt(); @@ -6839,59 +6362,49 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, CGF.getContext(), CS->getCapturedStmt()); Dir = dyn_cast_or_null<OMPExecutableDirective>(Child); } - if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && - !isOpenMPSimdDirective(Dir->getDirectiveKind())) { + if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) { CS = Dir->getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; - } - if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) - return Bld.getInt32(1); + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); + } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) + return ReturnSequential(); } - return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + return NT; } case OMPD_target_teams: { if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); } const CapturedStmt *CS = D.getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( CGF.getContext(), CS->getCapturedStmt()); if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) { if (Dir->getDirectiveKind() == OMPD_distribute) { CS = Dir->getInnermostCapturedStmt(); - if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) - return NumThreads; + getNumThreads(CGF, CS, NTPtr, UpperBound, UpperBoundOnly, CondVal); } } - return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + return NT; } case OMPD_target_teams_distribute: if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); } - return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); + getNumThreads(CGF, D.getInnermostCapturedStmt(), NTPtr, UpperBound, + UpperBoundOnly, CondVal); + return NT; + case OMPD_target_teams_loop: + case OMPD_target_parallel_loop: case OMPD_target_parallel: case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_target_teams_distribute_parallel_for: case OMPD_target_teams_distribute_parallel_for_simd: { - llvm::Value *CondVal = nullptr; - // Handle if clause. If if clause present, the number of threads is - // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. - if (D.hasClausesOfKind<OMPIfClause>()) { + if (CondVal && D.hasClausesOfKind<OMPIfClause>()) { const OMPIfClause *IfClause = nullptr; for (const auto *C : D.getClausesOfKind<OMPIfClause>()) { if (C->getNameModifier() == OMPD_unknown || @@ -6905,106 +6418,92 @@ emitNumThreadsForTargetDirective(CodeGenFunction &CGF, bool Result; if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) { if (!Result) - return Bld.getInt32(1); + return ReturnSequential(); } else { CodeGenFunction::RunCleanupsScope Scope(CGF); - CondVal = CGF.EvaluateExprAsBool(Cond); + *CondVal = CGF.EvaluateExprAsBool(Cond); } } } if (D.hasClausesOfKind<OMPThreadLimitClause>()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>(); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr( - ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); - ThreadLimitVal = - Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false); + CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr); } if (D.hasClausesOfKind<OMPNumThreadsClause>()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>(); - llvm::Value *NumThreads = CGF.EmitScalarExpr( - NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); - NumThreadsVal = - Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false); - ThreadLimitVal = ThreadLimitVal - ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, - ThreadLimitVal), - NumThreadsVal, ThreadLimitVal) - : NumThreadsVal; - } - if (!ThreadLimitVal) - ThreadLimitVal = Bld.getInt32(0); - if (CondVal) - return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1)); - return ThreadLimitVal; + CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr); + return NumThreadsClause->getNumThreads(); + } + return NT; } case OMPD_target_teams_distribute_simd: case OMPD_target_simd: - return Bld.getInt32(1); - case OMPD_parallel: - case OMPD_for: - case OMPD_parallel_for: - case OMPD_parallel_master: - case OMPD_parallel_sections: - case OMPD_for_simd: - case OMPD_parallel_for_simd: - case OMPD_cancel: - case OMPD_cancellation_point: - case OMPD_ordered: - case OMPD_threadprivate: - case OMPD_allocate: - case OMPD_task: - case OMPD_simd: - case OMPD_sections: - case OMPD_section: - case OMPD_single: - case OMPD_master: - case OMPD_critical: - case OMPD_taskyield: - case OMPD_barrier: - case OMPD_taskwait: - case OMPD_taskgroup: - case OMPD_atomic: - case OMPD_flush: - case OMPD_depobj: - case OMPD_scan: - case OMPD_teams: - case OMPD_target_data: - case OMPD_target_exit_data: - case OMPD_target_enter_data: - case OMPD_distribute: - case OMPD_distribute_simd: - case OMPD_distribute_parallel_for: - case OMPD_distribute_parallel_for_simd: - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - case OMPD_teams_distribute_parallel_for: - case OMPD_teams_distribute_parallel_for_simd: - case OMPD_target_update: - case OMPD_declare_simd: - case OMPD_declare_variant: - case OMPD_begin_declare_variant: - case OMPD_end_declare_variant: - case OMPD_declare_target: - case OMPD_end_declare_target: - case OMPD_declare_reduction: - case OMPD_declare_mapper: - case OMPD_taskloop: - case OMPD_taskloop_simd: - case OMPD_master_taskloop: - case OMPD_master_taskloop_simd: - case OMPD_parallel_master_taskloop: - case OMPD_parallel_master_taskloop_simd: - case OMPD_requires: - case OMPD_unknown: - break; + return ReturnSequential(); default: break; } llvm_unreachable("Unsupported directive kind."); } +llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective( + CodeGenFunction &CGF, const OMPExecutableDirective &D) { + llvm::Value *NumThreadsVal = nullptr; + llvm::Value *CondVal = nullptr; + llvm::Value *ThreadLimitVal = nullptr; + const Expr *ThreadLimitExpr = nullptr; + int32_t UpperBound = -1; + + const Expr *NT = getNumThreadsExprForTargetDirective( + CGF, D, UpperBound, /* UpperBoundOnly */ false, &CondVal, + &ThreadLimitExpr); + + // Thread limit expressions are used below, emit them. + if (ThreadLimitExpr) { + ThreadLimitVal = + CGF.EmitScalarExpr(ThreadLimitExpr, /*IgnoreResultAssign=*/true); + ThreadLimitVal = CGF.Builder.CreateIntCast(ThreadLimitVal, CGF.Int32Ty, + /*isSigned=*/false); + } + + // Generate the num teams expression. + if (UpperBound == 1) { + NumThreadsVal = CGF.Builder.getInt32(UpperBound); + } else if (NT) { + NumThreadsVal = CGF.EmitScalarExpr(NT, /*IgnoreResultAssign=*/true); + NumThreadsVal = CGF.Builder.CreateIntCast(NumThreadsVal, CGF.Int32Ty, + /*isSigned=*/false); + } else if (ThreadLimitVal) { + // If we do not have a num threads value but a thread limit, replace the + // former with the latter. We know handled the thread limit expression. + NumThreadsVal = ThreadLimitVal; + ThreadLimitVal = nullptr; + } else { + // Default to "0" which means runtime choice. + assert(!ThreadLimitVal && "Default not applicable with thread limit value"); + NumThreadsVal = CGF.Builder.getInt32(0); + } + + // Handle if clause. If if clause present, the number of threads is + // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1. + if (CondVal) { + CodeGenFunction::RunCleanupsScope Scope(CGF); + NumThreadsVal = CGF.Builder.CreateSelect(CondVal, NumThreadsVal, + CGF.Builder.getInt32(1)); + } + + // If the thread limit and num teams expression were present, take the + // minimum. + if (ThreadLimitVal) { + NumThreadsVal = CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpULT(ThreadLimitVal, NumThreadsVal), + ThreadLimitVal, NumThreadsVal); + } + + return NumThreadsVal; +} + namespace { LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); @@ -7014,59 +6513,13 @@ LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE(); // code for that information. class MappableExprsHandler { public: - /// Values for bit flags used to specify the mapping type for - /// offloading. - enum OpenMPOffloadMappingFlags : uint64_t { - /// No flags - OMP_MAP_NONE = 0x0, - /// Allocate memory on the device and move data from host to device. - OMP_MAP_TO = 0x01, - /// Allocate memory on the device and move data from device to host. - OMP_MAP_FROM = 0x02, - /// Always perform the requested mapping action on the element, even - /// if it was already mapped before. - OMP_MAP_ALWAYS = 0x04, - /// Delete the element from the device environment, ignoring the - /// current reference count associated with the element. - OMP_MAP_DELETE = 0x08, - /// The element being mapped is a pointer-pointee pair; both the - /// pointer and the pointee should be mapped. - OMP_MAP_PTR_AND_OBJ = 0x10, - /// This flags signals that the base address of an entry should be - /// passed to the target kernel as an argument. - OMP_MAP_TARGET_PARAM = 0x20, - /// Signal that the runtime library has to return the device pointer - /// in the current position for the data being mapped. Used when we have the - /// use_device_ptr or use_device_addr clause. - OMP_MAP_RETURN_PARAM = 0x40, - /// This flag signals that the reference being passed is a pointer to - /// private data. - OMP_MAP_PRIVATE = 0x80, - /// Pass the element to the device by value. - OMP_MAP_LITERAL = 0x100, - /// Implicit map - OMP_MAP_IMPLICIT = 0x200, - /// Close is a hint to the runtime to allocate memory close to - /// the target device. - OMP_MAP_CLOSE = 0x400, - /// 0x800 is reserved for compatibility with XLC. - /// Produce a runtime error if the data is not already allocated. - OMP_MAP_PRESENT = 0x1000, - /// Signal that the runtime library should use args as an array of - /// descriptor_dim pointers and use args_size as dims. Used when we have - /// non-contiguous list items in target update directive - OMP_MAP_NON_CONTIG = 0x100000000000, - /// The 16 MSBs of the flags indicate whether the entry is member of some - /// struct/class. - OMP_MAP_MEMBER_OF = 0xffff000000000000, - LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF), - }; - /// Get the offset of the OMP_MAP_MEMBER_OF field. static unsigned getFlagMemberOffset() { unsigned Offset = 0; - for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1); - Remain = Remain >> 1) + for (uint64_t Remain = + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); + !(Remain & 1); Remain = Remain >> 1) Offset++; return Offset; } @@ -7088,67 +6541,31 @@ public: const Expr *getMapExpr() const { return MapExpr; } }; - /// Class that associates information with a base pointer to be passed to the - /// runtime library. - class BasePointerInfo { - /// The base pointer. - llvm::Value *Ptr = nullptr; - /// The base declaration that refers to this device pointer, or null if - /// there is none. - const ValueDecl *DevPtrDecl = nullptr; - - public: - BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr) - : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {} - llvm::Value *operator*() const { return Ptr; } - const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; } - void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; } - }; - + using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy; + using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; + using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy; + using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy; + using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy; + using MapNonContiguousArrayTy = + llvm::OpenMPIRBuilder::MapNonContiguousArrayTy; using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>; - using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>; - using MapValuesArrayTy = SmallVector<llvm::Value *, 4>; - using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>; - using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>; - using MapDimArrayTy = SmallVector<uint64_t, 4>; - using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>; + using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>; /// This structure contains combined information generated for mappable /// clauses, including base pointers, pointers, sizes, map types, user-defined /// mappers, and non-contiguous information. - struct MapCombinedInfoTy { - struct StructNonContiguousInfo { - bool IsNonContiguous = false; - MapDimArrayTy Dims; - MapNonContiguousArrayTy Offsets; - MapNonContiguousArrayTy Counts; - MapNonContiguousArrayTy Strides; - }; + struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy { MapExprsArrayTy Exprs; - MapBaseValuesArrayTy BasePointers; - MapValuesArrayTy Pointers; - MapValuesArrayTy Sizes; - MapFlagsArrayTy Types; - MapMappersArrayTy Mappers; - StructNonContiguousInfo NonContigInfo; + MapValueDeclsArrayTy Mappers; + MapValueDeclsArrayTy DevicePtrDecls; /// Append arrays in \a CurInfo. void append(MapCombinedInfoTy &CurInfo) { Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end()); - BasePointers.append(CurInfo.BasePointers.begin(), - CurInfo.BasePointers.end()); - Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end()); - Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end()); - Types.append(CurInfo.Types.begin(), CurInfo.Types.end()); + DevicePtrDecls.append(CurInfo.DevicePtrDecls.begin(), + CurInfo.DevicePtrDecls.end()); Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end()); - NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(), - CurInfo.NonContigInfo.Dims.end()); - NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(), - CurInfo.NonContigInfo.Offsets.end()); - NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(), - CurInfo.NonContigInfo.Counts.end()); - NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(), - CurInfo.NonContigInfo.Strides.end()); + llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo); } }; @@ -7157,12 +6574,15 @@ public: /// [ValueDecl *] --> {LE(FieldIndex, Pointer), /// HE(FieldIndex, Pointer)} struct StructRangeInfoTy { + MapCombinedInfoTy PreliminaryMapData; std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = { 0, Address::invalid()}; std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = { 0, Address::invalid()}; Address Base = Address::invalid(); + Address LB = Address::invalid(); bool IsArraySection = false; + bool HasCompleteRecord = false; }; private: @@ -7227,6 +6647,16 @@ private: SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> DevPointersMap; + /// Map between device addr declarations and their expression components. + /// The key value for declarations in 'this' is null. + llvm::DenseMap< + const ValueDecl *, + SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>> + HasDevAddrsMap; + + /// Map between lambda declarations and their map type. + llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap; + llvm::Value *getExprTypeSize(const Expr *E) const { QualType ExprTy = E->getType().getCanonicalType(); @@ -7251,8 +6681,8 @@ private: // Given that an array section is considered a built-in type, we need to // do the calculation based on the length of the section instead of relying // on CGF.getTypeSize(E->getType()). - if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) { - QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType( + if (const auto *OAE = dyn_cast<ArraySectionExpr>(E)) { + QualType BaseTy = ArraySectionExpr::getBaseOriginalType( OAE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); @@ -7311,7 +6741,8 @@ private: ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const { OpenMPOffloadMappingFlags Bits = - IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE; + IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT + : OpenMPOffloadMappingFlags::OMP_MAP_NONE; switch (MapType) { case OMPC_MAP_alloc: case OMPC_MAP_release: @@ -7321,45 +6752,43 @@ private: // type modifiers. break; case OMPC_MAP_to: - Bits |= OMP_MAP_TO; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO; break; case OMPC_MAP_from: - Bits |= OMP_MAP_FROM; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM; break; case OMPC_MAP_tofrom: - Bits |= OMP_MAP_TO | OMP_MAP_FROM; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM; break; case OMPC_MAP_delete: - Bits |= OMP_MAP_DELETE; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE; break; case OMPC_MAP_unknown: llvm_unreachable("Unexpected map type!"); } if (AddPtrFlag) - Bits |= OMP_MAP_PTR_AND_OBJ; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; if (AddIsTargetParamFlag) - Bits |= OMP_MAP_TARGET_PARAM; - if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always) - != MapModifiers.end()) - Bits |= OMP_MAP_ALWAYS; - if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_close) - != MapModifiers.end()) - Bits |= OMP_MAP_CLOSE; - if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_present) - != MapModifiers.end()) - Bits |= OMP_MAP_PRESENT; - if (llvm::find(MotionModifiers, OMPC_MOTION_MODIFIER_present) - != MotionModifiers.end()) - Bits |= OMP_MAP_PRESENT; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) || + llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold)) + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; if (IsNonContiguous) - Bits |= OMP_MAP_NON_CONTIG; + Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG; return Bits; } /// Return true if the provided expression is a final array section. A /// final array section, is one whose length can't be proved to be one. bool isFinalArraySectionExpression(const Expr *E) const { - const auto *OASE = dyn_cast<OMPArraySectionExpr>(E); + const auto *OASE = dyn_cast<ArraySectionExpr>(E); // It is not an array section and therefore not a unity-size one. if (!OASE) @@ -7375,11 +6804,11 @@ private: // for this dimension. Also, we should always expect a length if the // base type is pointer. if (!Length) { - QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType( + QualType BaseQTy = ArraySectionExpr::getBaseOriginalType( OASE->getBase()->IgnoreParenImpCasts()) .getCanonicalType(); if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr())) - return ATy->getSize().getSExtValue() != 1; + return ATy->getSExtSize() != 1; // If we don't have a constant dimension length, we have to consider // the current section as having any size, so it is not necessarily // unitary. If it happen to be unity size, that's user fault. @@ -7404,12 +6833,15 @@ private: OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, ArrayRef<OpenMPMotionModifierKind> MotionModifiers, OMPClauseMappableExprCommon::MappableExprComponentListRef Components, - MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct, - bool IsFirstComponentList, bool IsImplicit, + MapCombinedInfoTy &CombinedInfo, + MapCombinedInfoTy &StructBaseCombinedInfo, + StructRangeInfoTy &PartialStruct, bool IsFirstComponentList, + bool IsImplicit, bool GenerateAllInfoForClauses, const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false, const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr, ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> - OverlappedElements = llvm::None) const { + OverlappedElements = std::nullopt, + bool AreBothBasePtrAndPteeMapped = false) const { // The following summarizes what has to be generated for each map and the // types below. The generated information is expressed in this order: // base pointer, section pointer, size, flags @@ -7418,6 +6850,7 @@ private: // double d; // int i[100]; // float *p; + // int **a = &i; // // struct S1 { // int i; @@ -7429,6 +6862,7 @@ private: // S1 s; // double *p; // struct S2 *ps; + // int &ref; // } // S2 s; // S2 *ps; @@ -7450,6 +6884,14 @@ private: // in unified shared memory mode or for local pointers // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM // + // map((*a)[0:3]) + // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM + // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM + // + // map(**a) + // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM + // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM + // // map(s) // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM // @@ -7472,6 +6914,14 @@ private: // optimizes this entry out, same in the examples below) // (***) map the pointee (map: to) // + // map(to: s.ref) + // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*) + // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***) + // (*) alloc space for struct members, only this is a target parameter + // (**) map the pointer (nothing to be mapped in this example) (the compiler + // optimizes this entry out, same in the examples below) + // (***) map the pointee (map: to) + // // map(s.ps) // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM // @@ -7567,6 +7017,10 @@ private: // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO // (*) the struct this entry pertains to is the 4th element in the list // of arguments, hence MEMBER_OF(4) + // + // map(p, p[:100]) + // ===> map(p[:100]) + // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM // Track if the map information being generated is the first for a capture. bool IsCaptureFirstInfo = IsFirstComponentList; @@ -7587,9 +7041,11 @@ private: Address BP = Address::invalid(); const Expr *AssocExpr = I->getAssociatedExpression(); const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr); - const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); + const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr); + if (AreBothBasePtrAndPteeMapped && std::next(I) == CE) + return; if (isa<MemberExpr>(AssocExpr)) { // The base is the 'this' pointer. The content of the pointer is going // to be the base of the field being mapped. @@ -7597,22 +7053,24 @@ private: } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) || (OASE && isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) { - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); } else if (OAShE && isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) { BP = Address( CGF.EmitScalarExpr(OAShE->getBase()), + CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()), CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType())); } else { // The base is the reference to the variable. // BP = &Var. - BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF); + BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(); if (const auto *VD = dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) { - if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) { if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) { RequiresReference = true; BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -7630,8 +7088,9 @@ private: // can be associated with the combined storage if shared memory mode is // active or the base declaration is not global variable. const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration()); - if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || - !VD || VD->hasLocalStorage()) + if (!AreBothBasePtrAndPteeMapped && + (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + !VD || VD->hasLocalStorage())) BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>()); else FirstPointerInComplexData = true; @@ -7669,6 +7128,26 @@ private: uint64_t DimSize = 1; bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous; + bool IsPrevMemberReference = false; + + // We need to check if we will be encountering any MEs. If we do not + // encounter any ME expression it means we will be mapping the whole struct. + // In that case we need to skip adding an entry for the struct to the + // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo + // list only when generating all info for clauses. + bool IsMappingWholeStruct = true; + if (!GenerateAllInfoForClauses) { + IsMappingWholeStruct = false; + } else { + for (auto TempI = I; TempI != CE; ++TempI) { + const MemberExpr *PossibleME = + dyn_cast<MemberExpr>(TempI->getAssociatedExpression()); + if (PossibleME) { + IsMappingWholeStruct = false; + break; + } + } + } for (; I != CE; ++I) { // If the current component is member of a struct (parent struct) mark it. @@ -7710,84 +7189,128 @@ private: const ValueDecl *MapDecl = (I->getAssociatedDeclaration()) ? I->getAssociatedDeclaration() : BaseDecl; + MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression() + : MapExpr; // Get information on whether the element is a pointer. Have to do a // special treatment for array sections given that they are built-in // types. const auto *OASE = - dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression()); + dyn_cast<ArraySectionExpr>(I->getAssociatedExpression()); const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression()); const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression()); const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression()); bool IsPointer = OAShE || - (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE) + (OASE && ArraySectionExpr::getBaseOriginalType(OASE) .getCanonicalType() ->isAnyPointerType()) || I->getAssociatedExpression()->getType()->isAnyPointerType(); - bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous; + bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) && + MapDecl && + MapDecl->getType()->isLValueReferenceType(); + bool IsNonDerefPointer = IsPointer && + !(UO && UO->getOpcode() != UO_Deref) && !BO && + !IsNonContiguous; if (OASE) ++DimSize; - if (Next == CE || IsNonDerefPointer || IsFinalArraySection) { + if (Next == CE || IsMemberReference || IsNonDerefPointer || + IsFinalArraySection) { // If this is not the last component, we expect the pointer to be // associated with an array expression or member expression. assert((Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || - isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || + isa<ArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && "Unexpected expression"); Address LB = Address::invalid(); + Address LowestElem = Address::invalid(); + auto &&EmitMemberExprBase = [](CodeGenFunction &CGF, + const MemberExpr *E) { + const Expr *BaseExpr = E->getBase(); + // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a + // scalar. + LValue BaseLV; + if (E->isArrow()) { + LValueBaseInfo BaseInfo; + TBAAAccessInfo TBAAInfo; + Address Addr = + CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo); + QualType PtrTy = BaseExpr->getType()->getPointeeType(); + BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo); + } else { + BaseLV = CGF.EmitOMPSharedLValue(BaseExpr); + } + return BaseLV; + }; if (OAShE) { - LB = Address(CGF.EmitScalarExpr(OAShE->getBase()), - CGF.getContext().getTypeAlignInChars( - OAShE->getBase()->getType())); + LowestElem = LB = + Address(CGF.EmitScalarExpr(OAShE->getBase()), + CGF.ConvertTypeForMem( + OAShE->getBase()->getType()->getPointeeType()), + CGF.getContext().getTypeAlignInChars( + OAShE->getBase()->getType())); + } else if (IsMemberReference) { + const auto *ME = cast<MemberExpr>(I->getAssociatedExpression()); + LValue BaseLVal = EmitMemberExprBase(CGF, ME); + LowestElem = CGF.EmitLValueForFieldInitialization( + BaseLVal, cast<FieldDecl>(MapDecl)) + .getAddress(); + LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType()) + .getAddress(); } else { - LB = CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) - .getAddress(CGF); + LowestElem = LB = + CGF.EmitOMPSharedLValue(I->getAssociatedExpression()) + .getAddress(); } // If this component is a pointer inside the base struct then we don't // need to create any entry for it - it will be combined with the object // it is pointing to into a single PTR_AND_OBJ entry. bool IsMemberPointerOrAddr = - (IsPointer || ForDeviceAddr) && EncounteredME && - (dyn_cast<MemberExpr>(I->getAssociatedExpression()) == - EncounteredME); - if (!OverlappedElements.empty()) { + EncounteredME && + (((IsPointer || ForDeviceAddr) && + I->getAssociatedExpression() == EncounteredME) || + (IsPrevMemberReference && !IsPointer) || + (IsMemberReference && Next != CE && + !Next->getAssociatedExpression()->getType()->isPointerType())); + if (!OverlappedElements.empty() && Next == CE) { // Handle base element with the info for overlapped elements. assert(!PartialStruct.Base.isValid() && "The base element is set."); - assert(Next == CE && - "Expected last element for the overlapped elements."); assert(!IsPointer && "Unexpected base element with the pointer type."); // Mark the whole struct as the struct that requires allocation on the // device. - PartialStruct.LowestElem = {0, LB}; + PartialStruct.LowestElem = {0, LowestElem}; CharUnits TypeSize = CGF.getContext().getTypeSizeInChars( I->getAssociatedExpression()->getType()); Address HB = CGF.Builder.CreateConstGEP( - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LB, - CGF.VoidPtrTy), + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + LowestElem, CGF.VoidPtrTy, CGF.Int8Ty), TypeSize.getQuantity() - 1); PartialStruct.HighestElem = { std::numeric_limits<decltype( PartialStruct.HighestElem.first)>::max(), HB}; PartialStruct.Base = BP; + PartialStruct.LB = LB; + assert( + PartialStruct.PreliminaryMapData.BasePointers.empty() && + "Overlapped elements must be used only once for the variable."); + std::swap(PartialStruct.PreliminaryMapData, CombinedInfo); // Emit data for non-overlapped data. OpenMPOffloadMappingFlags Flags = - OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, /*AddPtrFlag=*/false, /*AddIsTargetParamFlag=*/false, IsNonContiguous); - LB = BP; llvm::Value *Size = nullptr; // Do bitcopy of all non-overlapped structure elements. for (OMPClauseMappableExprCommon::MappableExprComponentListRef @@ -7795,20 +7318,33 @@ private: Address ComponentLB = Address::invalid(); for (const OMPClauseMappableExprCommon::MappableComponent &MC : Component) { - if (MC.getAssociatedDeclaration()) { - ComponentLB = - CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) - .getAddress(CGF); - Size = CGF.Builder.CreatePtrDiff( - CGF.EmitCastToVoidPtr(ComponentLB.getPointer()), - CGF.EmitCastToVoidPtr(LB.getPointer())); + if (const ValueDecl *VD = MC.getAssociatedDeclaration()) { + const auto *FD = dyn_cast<FieldDecl>(VD); + if (FD && FD->getType()->isLValueReferenceType()) { + const auto *ME = + cast<MemberExpr>(MC.getAssociatedExpression()); + LValue BaseLVal = EmitMemberExprBase(CGF, ME); + ComponentLB = + CGF.EmitLValueForFieldInitialization(BaseLVal, FD) + .getAddress(); + } else { + ComponentLB = + CGF.EmitOMPSharedLValue(MC.getAssociatedExpression()) + .getAddress(); + } + llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF); + llvm::Value *LBPtr = LB.emitRawPointer(CGF); + Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr, + LBPtr); break; } } assert(Size && "Failed to determine structure size"); CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.getPointer()); - CombinedInfo.Pointers.push_back(LB.getPointer()); + CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( Size, CGF.Int64Ty, /*isSigned=*/true)); CombinedInfo.Types.push_back(Flags); @@ -7818,12 +7354,14 @@ private: LB = CGF.Builder.CreateConstGEP(ComponentLB, 1); } CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.getPointer()); - CombinedInfo.Pointers.push_back(LB.getPointer()); + CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); + llvm::Value *LBPtr = LB.emitRawPointer(CGF); Size = CGF.Builder.CreatePtrDiff( - CGF.EmitCastToVoidPtr( - CGF.Builder.CreateConstGEP(HB, 1).getPointer()), - CGF.EmitCastToVoidPtr(LB.getPointer())); + CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF), + LBPtr); CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); CombinedInfo.Types.push_back(Flags); @@ -7833,48 +7371,79 @@ private: break; } llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression()); + // Skip adding an entry in the CurInfo of this combined entry if the + // whole struct is currently being mapped. The struct needs to be added + // in the first position before any data internal to the struct is being + // mapped. if (!IsMemberPointerOrAddr || (Next == CE && MapType != OMPC_MAP_unknown)) { - CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); - CombinedInfo.BasePointers.push_back(BP.getPointer()); - CombinedInfo.Pointers.push_back(LB.getPointer()); - CombinedInfo.Sizes.push_back( - CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize - : 1); + if (!IsMappingWholeStruct) { + CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); + CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + Size, CGF.Int64Ty, /*isSigned=*/true)); + CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize + : 1); + } else { + StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr); + StructBaseCombinedInfo.BasePointers.push_back( + BP.emitRawPointer(CGF)); + StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr); + StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + StructBaseCombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF)); + StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + Size, CGF.Int64Ty, /*isSigned=*/true)); + StructBaseCombinedInfo.NonContigInfo.Dims.push_back( + IsNonContiguous ? DimSize : 1); + } // If Mapper is valid, the last component inherits the mapper. bool HasMapper = Mapper && Next == CE; - CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); + if (!IsMappingWholeStruct) + CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr); + else + StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper + : nullptr); // We need to add a pointer flag for each map that comes from the // same expression except for the first one. We also need to signal // this map is the first one that relates with the current capture // (there is a set of entries for each capture). - OpenMPOffloadMappingFlags Flags = getMapTypeBits( - MapType, MapModifiers, MotionModifiers, IsImplicit, - !IsExpressionFirstInfo || RequiresReference || - FirstPointerInComplexData, - IsCaptureFirstInfo && !RequiresReference, IsNonContiguous); + OpenMPOffloadMappingFlags Flags = + getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit, + !IsExpressionFirstInfo || RequiresReference || + FirstPointerInComplexData || IsMemberReference, + AreBothBasePtrAndPteeMapped || + (IsCaptureFirstInfo && !RequiresReference), + IsNonContiguous); - if (!IsExpressionFirstInfo) { + if (!IsExpressionFirstInfo || IsMemberReference) { // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well, // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags. - if (IsPointer) - Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS | - OMP_MAP_DELETE | OMP_MAP_CLOSE); + if (IsPointer || (IsMemberReference && Next != CE)) + Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM | + OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS | + OpenMPOffloadMappingFlags::OMP_MAP_DELETE | + OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); if (ShouldBeMemberOf) { // Set placeholder value MEMBER_OF=FFFF to indicate that the flag // should be later updated with the correct value of MEMBER_OF. - Flags |= OMP_MAP_MEMBER_OF; + Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF; // From now on, all subsequent PTR_AND_OBJ entries should not be // marked as MEMBER_OF. ShouldBeMemberOf = false; } } - CombinedInfo.Types.push_back(Flags); + if (!IsMappingWholeStruct) + CombinedInfo.Types.push_back(Flags); + else + StructBaseCombinedInfo.Types.push_back(Flags); } // If we have encountered a member expression so far, keep track of the @@ -7886,20 +7455,28 @@ private: // Update info about the lowest and highest elements for this struct if (!PartialStruct.Base.isValid()) { - PartialStruct.LowestElem = {FieldIndex, LB}; + PartialStruct.LowestElem = {FieldIndex, LowestElem}; if (IsFinalArraySection) { Address HB = - CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false) - .getAddress(CGF); + CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) + .getAddress(); PartialStruct.HighestElem = {FieldIndex, HB}; } else { - PartialStruct.HighestElem = {FieldIndex, LB}; + PartialStruct.HighestElem = {FieldIndex, LowestElem}; } PartialStruct.Base = BP; + PartialStruct.LB = BP; } else if (FieldIndex < PartialStruct.LowestElem.first) { - PartialStruct.LowestElem = {FieldIndex, LB}; + PartialStruct.LowestElem = {FieldIndex, LowestElem}; } else if (FieldIndex > PartialStruct.HighestElem.first) { - PartialStruct.HighestElem = {FieldIndex, LB}; + if (IsFinalArraySection) { + Address HB = + CGF.EmitArraySectionExpr(OASE, /*IsLowerBound=*/false) + .getAddress(); + PartialStruct.HighestElem = {FieldIndex, HB}; + } else { + PartialStruct.HighestElem = {FieldIndex, LowestElem}; + } } } @@ -7913,11 +7490,12 @@ private: // The pointer becomes the base for the next element. if (Next != CE) - BP = LB; + BP = IsMemberReference ? LowestElem : LB; IsExpressionFirstInfo = false; IsCaptureFirstInfo = false; FirstPointerInComplexData = false; + IsPrevMemberReference = IsMemberReference; } else if (FirstPointerInComplexData) { QualType Ty = Components.rbegin() ->getAssociatedDeclaration() @@ -7927,6 +7505,10 @@ private: FirstPointerInComplexData = false; } } + // If ran into the whole component - allocate the space for the whole + // record. + if (!EncounteredME) + PartialStruct.HasCompleteRecord = true; if (!IsNonContiguous) return; @@ -7947,12 +7529,12 @@ private: for (const OMPClauseMappableExprCommon::MappableComponent &Component : Components) { const Expr *AssocExpr = Component.getAssociatedExpression(); - const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); + const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); if (!OASE) continue; - QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase()); + QualType Ty = ArraySectionExpr::getBaseOriginalType(OASE->getBase()); auto *CAT = Context.getAsConstantArrayType(Ty); auto *VAT = Context.getAsVariableArrayType(Ty); @@ -7990,8 +7572,8 @@ private: // it. if (DimSizes.size() < Components.size() - 1) { if (CAT) - DimSizes.push_back(llvm::ConstantInt::get( - CGF.Int64Ty, CAT->getSize().getZExtValue())); + DimSizes.push_back( + llvm::ConstantInt::get(CGF.Int64Ty, CAT->getZExtSize())); else if (VAT) DimSizes.push_back(CGF.Builder.CreateIntCast( CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty, @@ -8000,7 +7582,7 @@ private: } // Skip the dummy dimension since we have already have its information. - auto DI = DimSizes.begin() + 1; + auto *DI = DimSizes.begin() + 1; // Product of dimension. llvm::Value *DimProd = llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize); @@ -8026,7 +7608,7 @@ private: continue; } - const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr); + const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr); if (!OASE) continue; @@ -8108,7 +7690,7 @@ private: /// Return the adjusted map modifiers if the declaration a capture refers to /// appears in a first-private clause. This is expected to be used only with /// directives that start with 'target'. - MappableExprsHandler::OpenMPOffloadMappingFlags + OpenMPOffloadMappingFlags getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const { assert(Cap.capturesVariable() && "Expected capture by reference only!"); @@ -8116,39 +7698,23 @@ private: // 'private ptr' and 'map to' flag. Return the right flags if the captured // declaration is known as first-private in this handler. if (FirstPrivateDecls.count(Cap.getCapturedVar())) { - if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) && - Cap.getCaptureKind() == CapturedStmt::VCK_ByRef) - return MappableExprsHandler::OMP_MAP_ALWAYS | - MappableExprsHandler::OMP_MAP_TO; if (Cap.getCapturedVar()->getType()->isAnyPointerType()) - return MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_PTR_AND_OBJ; - return MappableExprsHandler::OMP_MAP_PRIVATE | - MappableExprsHandler::OMP_MAP_TO; - } - return MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM; - } - - static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) { - // Rotate by getFlagMemberOffset() bits. - return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1) - << getFlagMemberOffset()); - } - - static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags, - OpenMPOffloadMappingFlags MemberOfFlag) { - // If the entry is PTR_AND_OBJ but has not been marked with the special - // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be - // marked as MEMBER_OF. - if ((Flags & OMP_MAP_PTR_AND_OBJ) && - ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF)) - return; - - // Reset the placeholder value to prepare the flag for the assignment of the - // proper MEMBER_OF value. - Flags &= ~OMP_MAP_MEMBER_OF; - Flags |= MemberOfFlag; + return OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ; + return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE | + OpenMPOffloadMappingFlags::OMP_MAP_TO; + } + auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl()); + if (I != LambdasMap.end()) + // for map(to: lambda): using user specified map type. + return getMapTypeBits( + I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(), + /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(), + /*AddPtrFlag=*/false, + /*AddIsTargetParamFlag=*/false, + /*isNonContiguous=*/false); + return OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM; } void getPlainLayout(const CXXRecordDecl *RD, @@ -8168,12 +7734,15 @@ private: for (const auto &I : RD->bases()) { if (I.isVirtual()) continue; - const auto *Base = I.getType()->getAsCXXRecordDecl(); + + QualType BaseTy = I.getType(); + const auto *Base = BaseTy->getAsCXXRecordDecl(); // Ignore empty bases. - if (Base->isEmpty() || CGF.getContext() - .getASTRecordLayout(Base) - .getNonVirtualSize() - .isZero()) + if (isEmptyRecordForLayout(CGF.getContext(), BaseTy) || + CGF.getContext() + .getASTRecordLayout(Base) + .getNonVirtualSize() + .isZero()) continue; unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base); @@ -8181,10 +7750,12 @@ private: } // Fill in virtual bases. for (const auto &I : RD->vbases()) { - const auto *Base = I.getType()->getAsCXXRecordDecl(); + QualType BaseTy = I.getType(); // Ignore empty bases. - if (Base->isEmpty()) + if (isEmptyRecordForLayout(CGF.getContext(), BaseTy)) continue; + + const auto *Base = BaseTy->getAsCXXRecordDecl(); unsigned FieldIndex = RL.getVirtualBaseIndex(Base); if (RecordLayout[FieldIndex]) continue; @@ -8195,7 +7766,8 @@ private: for (const auto *Field : RD->fields()) { // Fill in non-bitfields. (Bitfields always use a zero pattern, which we // will fill in later.) - if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) { + if (!Field->isBitField() && + !isEmptyFieldForLayout(CGF.getContext(), Field)) { unsigned FieldIndex = RL.getLLVMFieldNo(Field); RecordLayout[FieldIndex] = Field; } @@ -8211,169 +7783,224 @@ private: } } -public: - MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) - : CurDir(&Dir), CGF(CGF) { - // Extract firstprivate clause information. - for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) - for (const auto *D : C->varlists()) - FirstPrivateDecls.try_emplace( - cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); - // Extract implicit firstprivates from uses_allocators clauses. - for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { - for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { - OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); - if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) - FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), - /*Implicit=*/true); - else if (const auto *VD = dyn_cast<VarDecl>( - cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) - ->getDecl())) - FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); - } - } - // Extract device pointer clause information. - for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) - for (auto L : C->component_lists()) - DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); - } - - /// Constructor for the declare mapper directive. - MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) - : CurDir(&Dir), CGF(CGF) {} - - /// Generate code for the combined entry if we have a partially mapped struct - /// and take care of the mapping flags of the arguments corresponding to - /// individual struct members. - void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, - MapFlagsArrayTy &CurTypes, - const StructRangeInfoTy &PartialStruct, - const ValueDecl *VD = nullptr, - bool NotTargetParams = true) const { - if (CurTypes.size() == 1 && - ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) && - !PartialStruct.IsArraySection) - return; - CombinedInfo.Exprs.push_back(VD); - // Base is the base of the struct - CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer()); - // Pointer is the address of the lowest element - llvm::Value *LB = PartialStruct.LowestElem.second.getPointer(); - CombinedInfo.Pointers.push_back(LB); - // There should not be a mapper for a combined entry. - CombinedInfo.Mappers.push_back(nullptr); - // Size is (addr of {highest+1} element) - (addr of lowest element) - llvm::Value *HB = PartialStruct.HighestElem.second.getPointer(); - llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1); - llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); - llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); - llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr); - llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, - /*isSigned=*/false); - CombinedInfo.Sizes.push_back(Size); - // Map type is always TARGET_PARAM, if generate info for captures. - CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE - : OMP_MAP_TARGET_PARAM); - // If any element has the present modifier, then make sure the runtime - // doesn't attempt to allocate the struct. - if (CurTypes.end() != - llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { - return Type & OMP_MAP_PRESENT; - })) - CombinedInfo.Types.back() |= OMP_MAP_PRESENT; - // Remove TARGET_PARAM flag from the first element if any. - if (!CurTypes.empty()) - CurTypes.front() &= ~OMP_MAP_TARGET_PARAM; - - // All other current entries will be MEMBER_OF the combined entry - // (except for PTR_AND_OBJ entries which do not have a placeholder value - // 0xFFFF in the MEMBER_OF field). - OpenMPOffloadMappingFlags MemberOfFlag = - getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); - for (auto &M : CurTypes) - setCorrectMemberOfFlag(M, MemberOfFlag); - } - /// Generate all the base pointers, section pointers, sizes, map types, and /// mappers for the extracted mappable expressions (all included in \a /// CombinedInfo). Also, for each item that relates with a device pointer, a /// pair of the relevant declaration and index where it occurs is appended to /// the device pointers info array. - void generateAllInfo( - MapCombinedInfoTy &CombinedInfo, + void generateAllInfoForClauses( + ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo, + llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { // We have to process the component lists that relate with the same // declaration in a single chunk so that we can generate the map flags // correctly. Therefore, we organize all lists in a map. - llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; + enum MapKind { Present, Allocs, Other, Total }; + llvm::MapVector<CanonicalDeclPtr<const Decl>, + SmallVector<SmallVector<MapInfo, 8>, 4>> + Info; // Helper function to fill the information map for the different supported // clauses. auto &&InfoGen = [&Info, &SkipVarSet]( - const ValueDecl *D, + const ValueDecl *D, MapKind Kind, OMPClauseMappableExprCommon::MappableExprComponentListRef L, OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers, ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper, const Expr *VarRef = nullptr, bool ForDeviceAddr = false) { - const ValueDecl *VD = - D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr; - if (SkipVarSet.count(VD)) + if (SkipVarSet.contains(D)) return; - Info[VD].emplace_back(L, MapType, MapModifiers, MotionModifiers, - ReturnDevicePointer, IsImplicit, Mapper, VarRef, - ForDeviceAddr); + auto It = Info.find(D); + if (It == Info.end()) + It = Info + .insert(std::make_pair( + D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total))) + .first; + It->second[Kind].emplace_back( + L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer, + IsImplicit, Mapper, VarRef, ForDeviceAddr); }; - assert(CurDir.is<const OMPExecutableDirective *>() && - "Expect a executable directive"); - const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); - for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { + for (const auto *Cl : Clauses) { + const auto *C = dyn_cast<OMPMapClause>(Cl); + if (!C) + continue; + MapKind Kind = Other; + if (llvm::is_contained(C->getMapTypeModifiers(), + OMPC_MAP_MODIFIER_present)) + Kind = Present; + else if (C->getMapType() == OMPC_MAP_alloc) + Kind = Allocs; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - // The Expression is not correct if the mapping is implicit const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr; - InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(), - C->getMapTypeModifiers(), llvm::None, + InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(), + C->getMapTypeModifiers(), std::nullopt, /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), E); ++EI; } } - for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>()) { + for (const auto *Cl : Clauses) { + const auto *C = dyn_cast<OMPToClause>(Cl); + if (!C) + continue; + MapKind Kind = Other; + if (llvm::is_contained(C->getMotionModifiers(), + OMPC_MOTION_MODIFIER_present)) + Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None, + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt, C->getMotionModifiers(), /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), *EI); ++EI; } } - for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>()) { + for (const auto *Cl : Clauses) { + const auto *C = dyn_cast<OMPFromClause>(Cl); + if (!C) + continue; + MapKind Kind = Other; + if (llvm::is_contained(C->getMotionModifiers(), + OMPC_MOTION_MODIFIER_present)) + Kind = Present; const auto *EI = C->getVarRefs().begin(); for (const auto L : C->component_lists()) { - InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None, - C->getMotionModifiers(), /*ReturnDevicePointer=*/false, - C->isImplicit(), std::get<2>(L), *EI); + InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, + std::nullopt, C->getMotionModifiers(), + /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L), + *EI); ++EI; } } + // Look at the use_device_ptr and use_device_addr clauses information and + // mark the existing map entries as such. If there is no map information for + // an entry in the use_device_ptr and use_device_addr list, we create one + // with map type 'alloc' and zero size section. It is the user fault if that + // was not mapped before. If there is no map information and the pointer is + // a struct member, then we defer the emission of that entry until the whole + // struct has been processed. + llvm::MapVector<CanonicalDeclPtr<const Decl>, + SmallVector<DeferredDevicePtrEntryTy, 4>> + DeferredInfo; + MapCombinedInfoTy UseDeviceDataCombinedInfo; + + auto &&UseDeviceDataCombinedInfoGen = + [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr, + CodeGenFunction &CGF, bool IsDevAddr) { + UseDeviceDataCombinedInfo.Exprs.push_back(VD); + UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr); + UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(VD); + UseDeviceDataCombinedInfo.DevicePointers.emplace_back( + IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); + UseDeviceDataCombinedInfo.Pointers.push_back(Ptr); + UseDeviceDataCombinedInfo.Sizes.push_back( + llvm::Constant::getNullValue(CGF.Int64Ty)); + UseDeviceDataCombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM); + UseDeviceDataCombinedInfo.Mappers.push_back(nullptr); + }; + + auto &&MapInfoGen = + [&DeferredInfo, &UseDeviceDataCombinedInfoGen, + &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD, + OMPClauseMappableExprCommon::MappableExprComponentListRef + Components, + bool IsImplicit, bool IsDevAddr) { + // We didn't find any match in our map information - generate a zero + // size array section - if the pointer is a struct member we defer + // this action until the whole struct has been processed. + if (isa<MemberExpr>(IE)) { + // Insert the pointer into Info to be processed by + // generateInfoForComponentList. Because it is a member pointer + // without a pointee, no entry will be generated for it, therefore + // we need to generate one after the whole struct has been + // processed. Nonetheless, generateInfoForComponentList must be + // called to take the pointer into account for the calculation of + // the range of the partial struct. + InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt, + std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit, + nullptr, nullptr, IsDevAddr); + DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr); + } else { + llvm::Value *Ptr; + if (IsDevAddr) { + if (IE->isGLValue()) + Ptr = CGF.EmitLValue(IE).getPointer(CGF); + else + Ptr = CGF.EmitScalarExpr(IE); + } else { + Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); + } + UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr); + } + }; + + auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD, + const Expr *IE, bool IsDevAddr) -> bool { + // We potentially have map information for this declaration already. + // Look for the first set of components that refer to it. If found, + // return true. + // If the first component is a member expression, we have to look into + // 'this', which maps to null in the map of map information. Otherwise + // look directly for the information. + auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); + if (It != Info.end()) { + bool Found = false; + for (auto &Data : It->second) { + auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) { + return MI.Components.back().getAssociatedDeclaration() == VD; + }); + // If we found a map entry, signal that the pointer has to be + // returned and move on to the next declaration. Exclude cases where + // the base pointer is mapped as array subscript, array section or + // array shaping. The base address is passed as a pointer to base in + // this case and cannot be used as a base for use_device_ptr list + // item. + if (CI != Data.end()) { + if (IsDevAddr) { + CI->ForDeviceAddr = IsDevAddr; + CI->ReturnDevicePointer = true; + Found = true; + break; + } else { + auto PrevCI = std::next(CI->Components.rbegin()); + const auto *VarD = dyn_cast<VarDecl>(VD); + if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || + isa<MemberExpr>(IE) || + !VD->getType().getNonReferenceType()->isPointerType() || + PrevCI == CI->Components.rend() || + isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || + VarD->hasLocalStorage()) { + CI->ForDeviceAddr = IsDevAddr; + CI->ReturnDevicePointer = true; + Found = true; + break; + } + } + } + } + return Found; + } + return false; + }; + // Look at the use_device_ptr clause information and mark the existing map // entries as such. If there is no map information for an entry in the // use_device_ptr list, we create one with map type 'alloc' and zero size // section. It is the user fault if that was not mapped before. If there is // no map information and the pointer is a struct member, then we defer the // emission of that entry until the whole struct has been processed. - llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>> - DeferredInfo; - MapCombinedInfoTy UseDevicePtrCombinedInfo; - - for (const auto *C : - CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) { + for (const auto *Cl : Clauses) { + const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl); + if (!C) + continue; for (const auto L : C->component_lists()) { OMPClauseMappableExprCommon::MappableExprComponentListRef Components = std::get<1>(L); @@ -8382,76 +8009,21 @@ public: const ValueDecl *VD = Components.back().getAssociatedDeclaration(); VD = cast<ValueDecl>(VD->getCanonicalDecl()); const Expr *IE = Components.back().getAssociatedExpression(); - // If the first component is a member expression, we have to look into - // 'this', which maps to null in the map of map information. Otherwise - // look directly for the information. - auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); - - // We potentially have map information for this declaration already. - // Look for the first set of components that refer to it. - if (It != Info.end()) { - auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { - return MI.Components.back().getAssociatedDeclaration() == VD; - }); - // If we found a map entry, signal that the pointer has to be returned - // and move on to the next declaration. - // Exclude cases where the base pointer is mapped as array subscript, - // array section or array shaping. The base address is passed as a - // pointer to base in this case and cannot be used as a base for - // use_device_ptr list item. - if (CI != It->second.end()) { - auto PrevCI = std::next(CI->Components.rbegin()); - const auto *VarD = dyn_cast<VarDecl>(VD); - if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() || - isa<MemberExpr>(IE) || - !VD->getType().getNonReferenceType()->isPointerType() || - PrevCI == CI->Components.rend() || - isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD || - VarD->hasLocalStorage()) { - CI->ReturnDevicePointer = true; - continue; - } - } - } - - // We didn't find any match in our map information - generate a zero - // size array section - if the pointer is a struct member we defer this - // action until the whole struct has been processed. - if (isa<MemberExpr>(IE)) { - // Insert the pointer into Info to be processed by - // generateInfoForComponentList. Because it is a member pointer - // without a pointee, no entry will be generated for it, therefore - // we need to generate one after the whole struct has been processed. - // Nonetheless, generateInfoForComponentList must be called to take - // the pointer into account for the calculation of the range of the - // partial struct. - InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None, llvm::None, - /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr); - DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false); - } else { - llvm::Value *Ptr = - CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc()); - UseDevicePtrCombinedInfo.Exprs.push_back(VD); - UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD); - UseDevicePtrCombinedInfo.Pointers.push_back(Ptr); - UseDevicePtrCombinedInfo.Sizes.push_back( - llvm::Constant::getNullValue(CGF.Int64Ty)); - UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); - UseDevicePtrCombinedInfo.Mappers.push_back(nullptr); - } + if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false)) + continue; + MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), + /*IsDevAddr=*/false); } } - // Look at the use_device_addr clause information and mark the existing map - // entries as such. If there is no map information for an entry in the - // use_device_addr list, we create one with map type 'alloc' and zero size - // section. It is the user fault if that was not mapped before. If there is - // no map information and the pointer is a struct member, then we defer the - // emission of that entry until the whole struct has been processed. llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; - for (const auto *C : - CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) { + for (const auto *Cl : Clauses) { + const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl); + if (!C) + continue; for (const auto L : C->component_lists()) { + OMPClauseMappableExprCommon::MappableExprComponentListRef Components = + std::get<1>(L); assert(!std::get<1>(L).empty() && "Not expecting empty list of components!"); const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration(); @@ -8459,98 +8031,100 @@ public: continue; VD = cast<ValueDecl>(VD->getCanonicalDecl()); const Expr *IE = std::get<1>(L).back().getAssociatedExpression(); - // If the first component is a member expression, we have to look into - // 'this', which maps to null in the map of map information. Otherwise - // look directly for the information. - auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD); - - // We potentially have map information for this declaration already. - // Look for the first set of components that refer to it. - if (It != Info.end()) { - auto *CI = llvm::find_if(It->second, [VD](const MapInfo &MI) { - return MI.Components.back().getAssociatedDeclaration() == VD; - }); - // If we found a map entry, signal that the pointer has to be returned - // and move on to the next declaration. - if (CI != It->second.end()) { - CI->ReturnDevicePointer = true; - continue; - } - } - - // We didn't find any match in our map information - generate a zero - // size array section - if the pointer is a struct member we defer this - // action until the whole struct has been processed. - if (isa<MemberExpr>(IE)) { - // Insert the pointer into Info to be processed by - // generateInfoForComponentList. Because it is a member pointer - // without a pointee, no entry will be generated for it, therefore - // we need to generate one after the whole struct has been processed. - // Nonetheless, generateInfoForComponentList must be called to take - // the pointer into account for the calculation of the range of the - // partial struct. - InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None, - llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(), - nullptr, nullptr, /*ForDeviceAddr=*/true); - DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true); - } else { - llvm::Value *Ptr; - if (IE->isGLValue()) - Ptr = CGF.EmitLValue(IE).getPointer(CGF); - else - Ptr = CGF.EmitScalarExpr(IE); - CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.emplace_back(Ptr, VD); - CombinedInfo.Pointers.push_back(Ptr); - CombinedInfo.Sizes.push_back( - llvm::Constant::getNullValue(CGF.Int64Ty)); - CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM); - CombinedInfo.Mappers.push_back(nullptr); - } + if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true)) + continue; + MapInfoGen(CGF, IE, VD, Components, C->isImplicit(), + /*IsDevAddr=*/true); } } - for (const auto &M : Info) { - // Underlying variable declaration used in the map clause. - const ValueDecl *VD = std::get<0>(M); - - // Temporary generated information. - MapCombinedInfoTy CurInfo; + for (const auto &Data : Info) { StructRangeInfoTy PartialStruct; - - for (const MapInfo &L : M.second) { - assert(!L.Components.empty() && - "Not expecting declaration with no component lists."); - - // Remember the current base pointer index. - unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); - CurInfo.NonContigInfo.IsNonContiguous = - L.Components.back().isNonContiguous(); - generateInfoForComponentList( - L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, - PartialStruct, /*IsFirstComponentList=*/false, L.IsImplicit, - L.Mapper, L.ForDeviceAddr, VD, L.VarRef); - - // If this entry relates with a device pointer, set the relevant - // declaration and add the 'return pointer' flag. - if (L.ReturnDevicePointer) { - assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx && - "Unexpected number of mapped base pointers."); - - const ValueDecl *RelevantVD = - L.Components.back().getAssociatedDeclaration(); - assert(RelevantVD && - "No relevant declaration related with device pointer??"); - - CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl( - RelevantVD); - CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM; + // Current struct information: + MapCombinedInfoTy CurInfo; + // Current struct base information: + MapCombinedInfoTy StructBaseCurInfo; + const Decl *D = Data.first; + const ValueDecl *VD = cast_or_null<ValueDecl>(D); + bool HasMapBasePtr = false; + bool HasMapArraySec = false; + if (VD && VD->getType()->isAnyPointerType()) { + for (const auto &M : Data.second) { + HasMapBasePtr = any_of(M, [](const MapInfo &L) { + return isa_and_present<DeclRefExpr>(L.VarRef); + }); + HasMapArraySec = any_of(M, [](const MapInfo &L) { + return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>( + L.VarRef); + }); + if (HasMapBasePtr && HasMapArraySec) + break; + } + } + for (const auto &M : Data.second) { + for (const MapInfo &L : M) { + assert(!L.Components.empty() && + "Not expecting declaration with no component lists."); + + // Remember the current base pointer index. + unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size(); + unsigned StructBasePointersIdx = + StructBaseCurInfo.BasePointers.size(); + CurInfo.NonContigInfo.IsNonContiguous = + L.Components.back().isNonContiguous(); + generateInfoForComponentList( + L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, + CurInfo, StructBaseCurInfo, PartialStruct, + /*IsFirstComponentList=*/false, L.IsImplicit, + /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD, + L.VarRef, /*OverlappedElements*/ std::nullopt, + HasMapBasePtr && HasMapArraySec); + + // If this entry relates to a device pointer, set the relevant + // declaration and add the 'return pointer' flag. + if (L.ReturnDevicePointer) { + // Check whether a value was added to either CurInfo or + // StructBaseCurInfo and error if no value was added to either of + // them: + assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() || + StructBasePointersIdx < + StructBaseCurInfo.BasePointers.size()) && + "Unexpected number of mapped base pointers."); + + // Choose a base pointer index which is always valid: + const ValueDecl *RelevantVD = + L.Components.back().getAssociatedDeclaration(); + assert(RelevantVD && + "No relevant declaration related with device pointer??"); + + // If StructBaseCurInfo has been updated this iteration then work on + // the first new entry added to it i.e. make sure that when multiple + // values are added to any of the lists, the first value added is + // being modified by the assignments below (not the last value + // added). + if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) { + StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] = + RelevantVD; + StructBaseCurInfo.DevicePointers[StructBasePointersIdx] = + L.ForDeviceAddr ? DeviceInfoTy::Address + : DeviceInfoTy::Pointer; + StructBaseCurInfo.Types[StructBasePointersIdx] |= + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + } else { + CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD; + CurInfo.DevicePointers[CurrentBasePointersIdx] = + L.ForDeviceAddr ? DeviceInfoTy::Address + : DeviceInfoTy::Pointer; + CurInfo.Types[CurrentBasePointersIdx] |= + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM; + } + } } } // Append any pending zero-length pointers which are struct members and // used with use_device_ptr or use_device_addr. - auto CI = DeferredInfo.find(M.first); + auto CI = DeferredInfo.find(Data.first); if (CI != DeferredInfo.end()) { for (const DeferredDevicePtrEntryTy &L : CI->second) { llvm::Value *BasePtr; @@ -8564,19 +8138,26 @@ public: // Entry is RETURN_PARAM. Also, set the placeholder value // MEMBER_OF=FFFF so that the entry is later updated with the // correct value of MEMBER_OF. - CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF); + CurInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); } else { BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF); Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE), L.IE->getExprLoc()); - // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder - // value MEMBER_OF=FFFF so that the entry is later updated with the - // correct value of MEMBER_OF. - CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM | - OMP_MAP_MEMBER_OF); + // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the + // placeholder value MEMBER_OF=FFFF so that the entry is later + // updated with the correct value of MEMBER_OF. + CurInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF); } CurInfo.Exprs.push_back(L.VD); - CurInfo.BasePointers.emplace_back(BasePtr, L.VD); + CurInfo.BasePointers.emplace_back(BasePtr); + CurInfo.DevicePtrDecls.emplace_back(L.VD); + CurInfo.DevicePointers.emplace_back( + L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer); CurInfo.Pointers.push_back(Ptr); CurInfo.Sizes.push_back( llvm::Constant::getNullValue(this->CGF.Int64Ty)); @@ -8584,97 +8165,217 @@ public: } } + // Unify entries in one list making sure the struct mapping precedes the + // individual fields: + MapCombinedInfoTy UnionCurInfo; + UnionCurInfo.append(StructBaseCurInfo); + UnionCurInfo.append(CurInfo); + // If there is an entry in PartialStruct it means we have a struct with // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) - emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); + if (PartialStruct.Base.isValid()) { + UnionCurInfo.NonContigInfo.Dims.push_back(0); + // Emit a combined entry: + emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct, + /*IsMapThis*/ !VD, OMPBuilder, VD); + } // We need to append the results of this capture to what we already have. - CombinedInfo.append(CurInfo); + CombinedInfo.append(UnionCurInfo); } // Append data for use_device_ptr clauses. - CombinedInfo.append(UseDevicePtrCombinedInfo); + CombinedInfo.append(UseDeviceDataCombinedInfo); } - /// Generate all the base pointers, section pointers, sizes, map types, and - /// mappers for the extracted map clauses of user-defined mapper (all included - /// in \a CombinedInfo). - void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const { - assert(CurDir.is<const OMPDeclareMapperDecl *>() && - "Expect a declare mapper directive"); - const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); - // We have to process the component lists that relate with the same - // declaration in a single chunk so that we can generate the map flags - // correctly. Therefore, we organize all lists in a map. - llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info; - - // Fill the information map for map clauses. - for (const auto *C : CurMapperDir->clauselists()) { - const auto *MC = cast<OMPMapClause>(C); - const auto *EI = MC->getVarRefs().begin(); - for (const auto L : MC->component_lists()) { - // The Expression is not correct if the mapping is implicit - const Expr *E = (MC->getMapLoc().isValid()) ? *EI : nullptr; - const ValueDecl *VD = - std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl()) - : nullptr; - // Get the corresponding user-defined mapper. - Info[VD].emplace_back(std::get<1>(L), MC->getMapType(), - MC->getMapTypeModifiers(), llvm::None, - /*ReturnDevicePointer=*/false, MC->isImplicit(), - std::get<2>(L), E); - ++EI; +public: + MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) { + // Extract firstprivate clause information. + for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>()) + for (const auto *D : C->varlists()) + FirstPrivateDecls.try_emplace( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit()); + // Extract implicit firstprivates from uses_allocators clauses. + for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) { + for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) { + OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I); + if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits)) + FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()), + /*Implicit=*/true); + else if (const auto *VD = dyn_cast<VarDecl>( + cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts()) + ->getDecl())) + FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true); } } + // Extract device pointer clause information. + for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>()) + for (auto L : C->component_lists()) + DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L)); + // Extract device addr clause information. + for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>()) + for (auto L : C->component_lists()) + HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L)); + // Extract map information. + for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) { + if (C->getMapType() != OMPC_MAP_to) + continue; + for (auto L : C->component_lists()) { + const ValueDecl *VD = std::get<0>(L); + const auto *RD = VD ? VD->getType() + .getCanonicalType() + .getNonReferenceType() + ->getAsCXXRecordDecl() + : nullptr; + if (RD && RD->isLambda()) + LambdasMap.try_emplace(std::get<0>(L), C); + } + } + } - for (const auto &M : Info) { - // We need to know when we generate information for the first component - // associated with a capture, because the mapping flags depend on it. - bool IsFirstComponentList = true; - - // Underlying variable declaration used in the map clause. - const ValueDecl *VD = std::get<0>(M); + /// Constructor for the declare mapper directive. + MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF) + : CurDir(&Dir), CGF(CGF) {} - // Temporary generated information. - MapCombinedInfoTy CurInfo; - StructRangeInfoTy PartialStruct; + /// Generate code for the combined entry if we have a partially mapped struct + /// and take care of the mapping flags of the arguments corresponding to + /// individual struct members. + void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo, + MapFlagsArrayTy &CurTypes, + const StructRangeInfoTy &PartialStruct, bool IsMapThis, + llvm::OpenMPIRBuilder &OMPBuilder, + const ValueDecl *VD = nullptr, + bool NotTargetParams = true) const { + if (CurTypes.size() == 1 && + ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) != + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) && + !PartialStruct.IsArraySection) + return; + Address LBAddr = PartialStruct.LowestElem.second; + Address HBAddr = PartialStruct.HighestElem.second; + if (PartialStruct.HasCompleteRecord) { + LBAddr = PartialStruct.LB; + HBAddr = PartialStruct.LB; + } + CombinedInfo.Exprs.push_back(VD); + // Base is the base of the struct + CombinedInfo.BasePointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + // Pointer is the address of the lowest element + llvm::Value *LB = LBAddr.emitRawPointer(CGF); + const CXXMethodDecl *MD = + CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr; + const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr; + bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false; + // There should not be a mapper for a combined entry. + if (HasBaseClass) { + // OpenMP 5.2 148:21: + // If the target construct is within a class non-static member function, + // and a variable is an accessible data member of the object for which the + // non-static data member function is invoked, the variable is treated as + // if the this[:1] expression had appeared in a map clause with a map-type + // of tofrom. + // Emit this[:1] + CombinedInfo.Pointers.push_back(PartialStruct.Base.emitRawPointer(CGF)); + QualType Ty = MD->getFunctionObjectParameterType(); + llvm::Value *Size = + CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty, + /*isSigned=*/true); + CombinedInfo.Sizes.push_back(Size); + } else { + CombinedInfo.Pointers.push_back(LB); + // Size is (addr of {highest+1} element) - (addr of lowest element) + llvm::Value *HB = HBAddr.emitRawPointer(CGF); + llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32( + HBAddr.getElementType(), HB, /*Idx0=*/1); + llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy); + llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy); + llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr); + llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty, + /*isSigned=*/false); + CombinedInfo.Sizes.push_back(Size); + } + CombinedInfo.Mappers.push_back(nullptr); + // Map type is always TARGET_PARAM, if generate info for captures. + CombinedInfo.Types.push_back( + NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE + : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); + // If any element has the present modifier, then make sure the runtime + // doesn't attempt to allocate the struct. + if (CurTypes.end() != + llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { + return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT); + })) + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT; + // Remove TARGET_PARAM flag from the first element + (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; + // If any element has the ompx_hold modifier, then make sure the runtime + // uses the hold reference count for the struct as a whole so that it won't + // be unmapped by an extra dynamic reference count decrement. Add it to all + // elements as well so the runtime knows which reference count to check + // when determining whether it's time for device-to-host transfers of + // individual elements. + if (CurTypes.end() != + llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) { + return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD); + })) { + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + for (auto &M : CurTypes) + M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD; + } - for (const MapInfo &L : M.second) { - assert(!L.Components.empty() && - "Not expecting declaration with no component lists."); - generateInfoForComponentList( - L.MapType, L.MapModifiers, L.MotionModifiers, L.Components, CurInfo, - PartialStruct, IsFirstComponentList, L.IsImplicit, L.Mapper, - L.ForDeviceAddr, VD, L.VarRef); - IsFirstComponentList = false; - } + // All other current entries will be MEMBER_OF the combined entry + // (except for PTR_AND_OBJ entries which do not have a placeholder value + // 0xFFFF in the MEMBER_OF field). + OpenMPOffloadMappingFlags MemberOfFlag = + OMPBuilder.getMemberOfFlag(CombinedInfo.BasePointers.size() - 1); + for (auto &M : CurTypes) + OMPBuilder.setCorrectMemberOfFlag(M, MemberOfFlag); + } - // If there is an entry in PartialStruct it means we have a struct with - // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) { - CurInfo.NonContigInfo.Dims.push_back(0); - emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD); - } + /// Generate all the base pointers, section pointers, sizes, map types, and + /// mappers for the extracted mappable expressions (all included in \a + /// CombinedInfo). Also, for each item that relates with a device pointer, a + /// pair of the relevant declaration and index where it occurs is appended to + /// the device pointers info array. + void generateAllInfo( + MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder, + const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet = + llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const { + assert(CurDir.is<const OMPExecutableDirective *>() && + "Expect a executable directive"); + const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, OMPBuilder, + SkipVarSet); + } - // We need to append the results of this capture to what we already have. - CombinedInfo.append(CurInfo); - } + /// Generate all the base pointers, section pointers, sizes, map types, and + /// mappers for the extracted map clauses of user-defined mapper (all included + /// in \a CombinedInfo). + void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo, + llvm::OpenMPIRBuilder &OMPBuilder) const { + assert(CurDir.is<const OMPDeclareMapperDecl *>() && + "Expect a declare mapper directive"); + const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>(); + generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo, + OMPBuilder); } /// Emit capture info for lambdas for variables captured by reference. void generateInfoForLambdaCaptures( const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo, llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const { - const auto *RD = VD->getType() - .getCanonicalType() - .getNonReferenceType() - ->getAsCXXRecordDecl(); + QualType VDType = VD->getType().getCanonicalType().getNonReferenceType(); + const auto *RD = VDType->getAsCXXRecordDecl(); if (!RD || !RD->isLambda()) return; - Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD)); - LValue VDLVal = CGF.MakeAddrLValue( - VDAddr, VD->getType().getCanonicalType().getNonReferenceType()); - llvm::DenseMap<const VarDecl *, FieldDecl *> Captures; + Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType), + CGF.getContext().getDeclAlign(VD)); + LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType); + llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures; FieldDecl *ThisCapture = nullptr; RD->getCaptureFields(Captures, ThisCapture); if (ThisCapture) { @@ -8685,18 +8386,23 @@ public: VDLVal.getPointer(CGF)); CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF)); CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); - CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); CombinedInfo.Mappers.push_back(nullptr); } for (const LambdaCapture &LC : RD->captures()) { if (!LC.capturesVariable()) continue; - const VarDecl *VD = LC.getCapturedVar(); + const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar()); if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType()) continue; auto It = Captures.find(VD); @@ -8708,6 +8414,8 @@ public: VDLVal.getPointer(CGF)); CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF)); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize( @@ -8719,26 +8427,34 @@ public: VDLVal.getPointer(CGF)); CombinedInfo.Exprs.push_back(VD); CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF)); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(VarRVal.getScalarVal()); CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0)); } - CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT); + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); CombinedInfo.Mappers.push_back(nullptr); } } /// Set correct indices for lambdas captures. void adjustMemberOfForLambdaCaptures( + llvm::OpenMPIRBuilder &OMPBuilder, const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers, MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers, MapFlagsArrayTy &Types) const { for (unsigned I = 0, E = Types.size(); I < E; ++I) { // Set correct member_of idx for all implicit lambda captures. - if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL | - OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT)) + if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ | + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)) continue; - llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]); + llvm::Value *BasePtr = LambdaPointers.lookup(BasePointers[I]); assert(BasePtr && "Unable to find base lambda address."); int TgtIdx = -1; for (unsigned J = I; J > 0; --J) { @@ -8752,8 +8468,9 @@ public: // All other current entries will be MEMBER_OF the combined entry // (except for PTR_AND_OBJ entries which do not have a placeholder value // 0xFFFF in the MEMBER_OF field). - OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx); - setCorrectMemberOfFlag(Types[I], MemberOfFlag); + OpenMPOffloadMappingFlags MemberOfFlag = + OMPBuilder.getMemberOfFlag(TgtIdx); + OMPBuilder.setCorrectMemberOfFlag(Types[I], MemberOfFlag); } } @@ -8770,19 +8487,26 @@ public: ? nullptr : Cap->getCapturedVar()->getCanonicalDecl(); + // for map(to: lambda): skip here, processing it in + // generateDefaultMapInfo + if (LambdasMap.count(VD)) + return; + // If this declaration appears in a is_device_ptr clause we just have to // pass the pointer by value. If it is a reference to a declaration, we just // pass its value. - if (DevPointersMap.count(VD)) { + if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) { CombinedInfo.Exprs.push_back(VD); - CombinedInfo.BasePointers.emplace_back(Arg, VD); + CombinedInfo.BasePointers.emplace_back(Arg); + CombinedInfo.DevicePtrDecls.emplace_back(VD); + CombinedInfo.DevicePointers.emplace_back(DeviceInfoTy::Pointer); CombinedInfo.Pointers.push_back(Arg); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty, /*isSigned=*/true)); CombinedInfo.Types.push_back( - (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) | - OMP_MAP_TARGET_PARAM); + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM); CombinedInfo.Mappers.push_back(nullptr); return; } @@ -8792,9 +8516,26 @@ public: OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool, const ValueDecl *, const Expr *>; SmallVector<MapData, 4> DeclComponentLists; + // For member fields list in is_device_ptr, store it in + // DeclComponentLists for generating components info. + static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown; + auto It = DevPointersMap.find(VD); + if (It != DevPointersMap.end()) + for (const auto &MCL : It->second) + DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown, + /*IsImpicit = */ true, nullptr, + nullptr); + auto I = HasDevAddrsMap.find(VD); + if (I != HasDevAddrsMap.end()) + for (const auto &MCL : I->second) + DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown, + /*IsImpicit = */ true, nullptr, + nullptr); assert(CurDir.is<const OMPExecutableDirective *>() && "Expect a executable directive"); const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>(); + bool HasMapBasePtr = false; + bool HasMapArraySec = false; for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) { const auto *EI = C->getVarRefs().begin(); for (const auto L : C->decl_component_lists(VD)) { @@ -8806,12 +8547,31 @@ public: assert(VDecl == VD && "We got information for the wrong declaration??"); assert(!Components.empty() && "Not expecting declaration with no component lists."); + if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E)) + HasMapBasePtr = true; + if (VD && E && VD->getType()->isAnyPointerType() && + (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E))) + HasMapArraySec = true; DeclComponentLists.emplace_back(Components, C->getMapType(), C->getMapTypeModifiers(), C->isImplicit(), Mapper, E); ++EI; } } + llvm::stable_sort(DeclComponentLists, [](const MapData &LHS, + const MapData &RHS) { + ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS); + OpenMPMapClauseKind MapType = std::get<1>(RHS); + bool HasPresent = + llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); + bool HasAllocs = MapType == OMPC_MAP_alloc; + MapModifiers = std::get<2>(RHS); + MapType = std::get<1>(LHS); + bool HasPresentR = + llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present); + bool HasAllocsR = MapType == OMPC_MAP_alloc; + return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR); + }); // Find overlapping elements (including the offset from the base element). llvm::SmallDenseMap< @@ -8831,7 +8591,7 @@ public: std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = L; ++Count; - for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) { + for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) { OMPClauseMappableExprCommon::MappableExprComponentListRef Components1; std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper, VarRef) = L1; @@ -8847,11 +8607,28 @@ public: if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration()) break; } - // Found overlapping if, at least for one component, reached the head of - // the components list. + // Found overlapping if, at least for one component, reached the head + // of the components list. if (CI == CE || SI == SE) { - assert((CI != CE || SI != SE) && - "Unexpected full match of the mapping components."); + // Ignore it if it is the same component. + if (CI == CE && SI == SE) + continue; + const auto It = (SI == SE) ? CI : SI; + // If one component is a pointer and another one is a kind of + // dereference of this pointer (array subscript, section, dereference, + // etc.), it is not an overlapping. + // Same, if one component is a base and another component is a + // dereferenced pointer memberexpr with the same base. + if (!isa<MemberExpr>(It->getAssociatedExpression()) || + (std::prev(It)->getAssociatedDeclaration() && + std::prev(It) + ->getAssociatedDeclaration() + ->getType() + ->isPointerType()) || + (It->getAssociatedDeclaration() && + It->getAssociatedDeclaration()->getType()->isPointerType() && + std::next(It) != CE && std::next(It) != SE)) + continue; const MapData &BaseData = CI == CE ? L : L1; OMPClauseMappableExprCommon::MappableExprComponentListRef SubData = SI == SE ? Components : Components1; @@ -8863,16 +8640,22 @@ public: // Sort the overlapped elements for each item. llvm::SmallVector<const FieldDecl *, 4> Layout; if (!OverlappedData.empty()) { - if (const auto *CRD = - VD->getType().getCanonicalType()->getAsCXXRecordDecl()) + const Type *BaseType = VD->getType().getCanonicalType().getTypePtr(); + const Type *OrigType = BaseType->getPointeeOrArrayElementType(); + while (BaseType != OrigType) { + BaseType = OrigType->getCanonicalTypeInternal().getTypePtr(); + OrigType = BaseType->getPointeeOrArrayElementType(); + } + + if (const auto *CRD = BaseType->getAsCXXRecordDecl()) getPlainLayout(CRD, Layout, /*AsBase=*/false); else { - const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl(); + const auto *RD = BaseType->getAsRecordDecl(); Layout.append(RD->field_begin(), RD->field_end()); } } for (auto &Pair : OverlappedData) { - llvm::sort( + llvm::stable_sort( Pair.getSecond(), [&Layout]( OMPClauseMappableExprCommon::MappableExprComponentListRef First, @@ -8904,7 +8687,7 @@ public: const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration()); if (FD1->getParent() == FD2->getParent()) return FD1->getFieldIndex() < FD2->getFieldIndex(); - const auto It = + const auto *It = llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) { return FD == FD1 || FD == FD2; }); @@ -8914,6 +8697,8 @@ public: // Associated with a capture, because the mapping flags depend on it. // Go through all of the elements with the overlapped elements. + bool IsFirstComponentList = true; + MapCombinedInfoTy StructBaseCombinedInfo; for (const auto &Pair : OverlappedData) { const MapData &L = *Pair.getFirst(); OMPClauseMappableExprCommon::MappableExprComponentListRef Components; @@ -8926,14 +8711,14 @@ public: L; ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef> OverlappedComponents = Pair.getSecond(); - bool IsFirstComponentList = true; generateInfoForComponentList( - MapType, MapModifiers, llvm::None, Components, CombinedInfo, - PartialStruct, IsFirstComponentList, IsImplicit, Mapper, + MapType, MapModifiers, std::nullopt, Components, CombinedInfo, + StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, + IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents); + IsFirstComponentList = false; } // Go through other elements without overlapped elements. - bool IsFirstComponentList = OverlappedData.empty(); for (const MapData &L : DeclComponentLists) { OMPClauseMappableExprCommon::MappableExprComponentListRef Components; OpenMPMapClauseKind MapType; @@ -8945,10 +8730,13 @@ public: L; auto It = OverlappedData.find(&L); if (It == OverlappedData.end()) - generateInfoForComponentList(MapType, MapModifiers, llvm::None, - Components, CombinedInfo, PartialStruct, - IsFirstComponentList, IsImplicit, Mapper, - /*ForDeviceAddr=*/false, VD, VarRef); + generateInfoForComponentList( + MapType, MapModifiers, std::nullopt, Components, CombinedInfo, + StructBaseCombinedInfo, PartialStruct, IsFirstComponentList, + IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper, + /*ForDeviceAddr=*/false, VD, VarRef, + /*OverlappedElements*/ std::nullopt, + HasMapBasePtr && HasMapArraySec); IsFirstComponentList = false; } } @@ -8963,28 +8751,34 @@ public: if (CI.capturesThis()) { CombinedInfo.Exprs.push_back(nullptr); CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(CV); const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr()); CombinedInfo.Sizes.push_back( CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()), CGF.Int64Ty, /*isSigned=*/true)); // Default map type. - CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM); + CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM); } else if (CI.capturesVariableByCopy()) { const VarDecl *VD = CI.getCapturedVar(); CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); CombinedInfo.Pointers.push_back(CV); if (!RI.getType()->isAnyPointerType()) { // We have to signal to the runtime captures passed by value that are // not pointers. - CombinedInfo.Types.push_back(OMP_MAP_LITERAL); + CombinedInfo.Types.push_back( + OpenMPOffloadMappingFlags::OMP_MAP_LITERAL); CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast( CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true)); } else { // Pointers are implicitly mapped with a zero size and no flags // (other than first map that is added for all implicit maps). - CombinedInfo.Types.push_back(OMP_MAP_NONE); + CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE); CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty)); } auto I = FirstPrivateDecls.find(VD); @@ -9002,40 +8796,28 @@ public: CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI)); const VarDecl *VD = CI.getCapturedVar(); auto I = FirstPrivateDecls.find(VD); - if (I != FirstPrivateDecls.end() && - VD->getType().isConstant(CGF.getContext())) { - llvm::Constant *Addr = - CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD); - // Copy the value of the original variable to the new global copy. - CGF.Builder.CreateMemCpy( - CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF), - Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)), - CombinedInfo.Sizes.back(), /*IsVolatile=*/false); - // Use new global variable as the base pointers. - CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); - CombinedInfo.BasePointers.push_back(Addr); - CombinedInfo.Pointers.push_back(Addr); + CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); + CombinedInfo.BasePointers.push_back(CV); + CombinedInfo.DevicePtrDecls.push_back(nullptr); + CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None); + if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { + Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( + CV, ElementType, CGF.getContext().getDeclAlign(VD), + AlignmentSource::Decl)); + CombinedInfo.Pointers.push_back(PtrAddr.emitRawPointer(CGF)); } else { - CombinedInfo.Exprs.push_back(VD->getCanonicalDecl()); - CombinedInfo.BasePointers.push_back(CV); - if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) { - Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue( - CV, ElementType, CGF.getContext().getDeclAlign(VD), - AlignmentSource::Decl)); - CombinedInfo.Pointers.push_back(PtrAddr.getPointer()); - } else { - CombinedInfo.Pointers.push_back(CV); - } + CombinedInfo.Pointers.push_back(CV); } if (I != FirstPrivateDecls.end()) IsImplicit = I->getSecond(); } // Every default map produces a single argument which is a target parameter. - CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM; + CombinedInfo.Types.back() |= + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM; // Add flag stating this is an implicit map. if (IsImplicit) - CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT; + CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; // No user-defined mapper for default mapping. CombinedInfo.Mappers.push_back(nullptr); @@ -9043,72 +8825,16 @@ public: }; } // anonymous namespace -static void emitNonContiguousDescriptor( - CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo, - CGOpenMPRuntime::TargetDataInfo &Info) { - CodeGenModule &CGM = CGF.CGM; - MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo - &NonContigInfo = CombinedInfo.NonContigInfo; +// Try to extract the base declaration from a `this->x` expression if possible. +static ValueDecl *getDeclFromThisExpr(const Expr *E) { + if (!E) + return nullptr; - // Build an array of struct descriptor_dim and then assign it to - // offload_args. - // - // struct descriptor_dim { - // uint64_t offset; - // uint64_t count; - // uint64_t stride - // }; - ASTContext &C = CGF.getContext(); - QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); - RecordDecl *RD; - RD = C.buildImplicitRecord("descriptor_dim"); - RD->startDefinition(); - addFieldToRecordDecl(C, RD, Int64Ty); - addFieldToRecordDecl(C, RD, Int64Ty); - addFieldToRecordDecl(C, RD, Int64Ty); - RD->completeDefinition(); - QualType DimTy = C.getRecordType(RD); - - enum { OffsetFD = 0, CountFD, StrideFD }; - // We need two index variable here since the size of "Dims" is the same as the - // size of Components, however, the size of offset, count, and stride is equal - // to the size of base declaration that is non-contiguous. - for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) { - // Skip emitting ir if dimension size is 1 since it cannot be - // non-contiguous. - if (NonContigInfo.Dims[I] == 1) - continue; - llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]); - QualType ArrayTy = - C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0); - Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); - for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) { - unsigned RevIdx = EE - II - 1; - LValue DimsLVal = CGF.MakeAddrLValue( - CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy); - // Offset - LValue OffsetLVal = CGF.EmitLValueForField( - DimsLVal, *std::next(RD->field_begin(), OffsetFD)); - CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal); - // Count - LValue CountLVal = CGF.EmitLValueForField( - DimsLVal, *std::next(RD->field_begin(), CountFD)); - CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal); - // Stride - LValue StrideLVal = CGF.EmitLValueForField( - DimsLVal, *std::next(RD->field_begin(), StrideFD)); - CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal); - } - // args[I] = &dims - Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - DimsAddr, CGM.Int8PtrTy); - llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, 0, I); - Address PAddr(P, CGF.getPointerAlign()); - CGF.Builder.CreateStore(DAddr.getPointer(), PAddr); - ++L; - } + if (const auto *OASE = dyn_cast<ArraySectionExpr>(E->IgnoreParenCasts())) + if (const MemberExpr *ME = + dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts())) + return ME->getMemberDecl(); + return nullptr; } /// Emit a string constant containing the names of the values mapped to the @@ -9116,30 +8842,35 @@ static void emitNonContiguousDescriptor( llvm::Constant * emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder, MappableExprsHandler::MappingExprInfo &MapExprs) { - llvm::Constant *SrcLocStr; - if (!MapExprs.getMapDecl()) { - SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); + + uint32_t SrcLocStrSize; + if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr()) + return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); + + SourceLocation Loc; + if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) { + if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr())) + Loc = VD->getLocation(); + else + Loc = MapExprs.getMapExpr()->getExprLoc(); } else { - std::string ExprName = ""; - if (MapExprs.getMapExpr()) { - PrintingPolicy P(CGF.getContext().getLangOpts()); - llvm::raw_string_ostream OS(ExprName); - MapExprs.getMapExpr()->printPretty(OS, nullptr, P); - OS.flush(); - } else { - ExprName = MapExprs.getMapDecl()->getNameAsString(); - } + Loc = MapExprs.getMapDecl()->getLocation(); + } - SourceLocation Loc = MapExprs.getMapDecl()->getLocation(); - PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); - const char *FileName = PLoc.getFilename(); - unsigned Line = PLoc.getLine(); - unsigned Column = PLoc.getColumn(); - SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FileName, ExprName.c_str(), - Line, Column); + std::string ExprName; + if (MapExprs.getMapExpr()) { + PrintingPolicy P(CGF.getContext().getLangOpts()); + llvm::raw_string_ostream OS(ExprName); + MapExprs.getMapExpr()->printPretty(OS, nullptr, P); + OS.flush(); + } else { + ExprName = MapExprs.getMapDecl()->getNameAsString(); } - return SrcLocStr; + PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); + return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName, + PLoc.getLine(), PLoc.getColumn(), + SrcLocStrSize); } /// Emit the arrays used to pass the captures and map information to the @@ -9150,260 +8881,45 @@ static void emitOffloadingArrays( CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder, bool IsNonContiguous = false) { CodeGenModule &CGM = CGF.CGM; - ASTContext &Ctx = CGF.getContext(); // Reset the array information. Info.clearArrayInfo(); Info.NumberOfPtrs = CombinedInfo.BasePointers.size(); - if (Info.NumberOfPtrs) { - // Detect if we have any capture size requiring runtime evaluation of the - // size so that a constant array could be eventually used. - bool hasRuntimeEvaluationCaptureSize = false; - for (llvm::Value *S : CombinedInfo.Sizes) - if (!isa<llvm::Constant>(S)) { - hasRuntimeEvaluationCaptureSize = true; - break; - } - - llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true); - QualType PointerArrayType = Ctx.getConstantArrayType( - Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); - Info.BasePointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer(); - Info.PointersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer(); - Address MappersArray = - CGF.CreateMemTemp(PointerArrayType, ".offload_mappers"); - Info.MappersArray = MappersArray.getPointer(); - - // If we don't have any VLA types or other types that require runtime - // evaluation, we can use a constant array for the map sizes, otherwise we - // need to fill up the arrays as we do for the pointers. - QualType Int64Ty = - Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); - if (hasRuntimeEvaluationCaptureSize) { - QualType SizeArrayType = Ctx.getConstantArrayType( - Int64Ty, PointerNumAP, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); - Info.SizesArray = - CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer(); - } else { - // We expect all the sizes to be constant, so we collect them to create - // a constant array. - SmallVector<llvm::Constant *, 16> ConstSizes; - for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) { - if (IsNonContiguous && - (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) { - ConstSizes.push_back(llvm::ConstantInt::get( - CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I])); - } else { - ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I])); - } - } + auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + return emitMappingInformation(CGF, OMPBuilder, MapExpr); + }; + if (CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo) { + CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); + llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), + FillInfoMap); + } - auto *SizesArrayInit = llvm::ConstantArray::get( - llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes); - std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"}); - auto *SizesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), SizesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - SizesArrayInit, Name); - SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - Info.SizesArray = SizesArrayGbl; - } - - // The map types are always constant so we don't need to generate code to - // fill arrays. Instead, we create an array constant. - SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0); - llvm::copy(CombinedInfo.Types, Mapping.begin()); - llvm::Constant *MapTypesArrayInit = - llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); - std::string MaptypesName = - CGM.getOpenMPRuntime().getName({"offload_maptypes"}); - auto *MapTypesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), MapTypesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - MapTypesArrayInit, MaptypesName); - MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - Info.MapTypesArray = MapTypesArrayGbl; - - // The information types are only built if there is debug information - // requested. - if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) { - Info.MapNamesArray = llvm::Constant::getNullValue( - llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo()); - } else { - auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { - return emitMappingInformation(CGF, OMPBuilder, MapExpr); - }; - SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size()); - llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap); - - llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get( - llvm::ArrayType::get( - llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo(), - CombinedInfo.Exprs.size()), - InfoMap); - auto *MapNamesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), MapNamesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - MapNamesArrayInit, - CGM.getOpenMPRuntime().getName({"offload_mapnames"})); - Info.MapNamesArray = MapNamesArrayGbl; - } - - // If there's a present map type modifier, it must not be applied to the end - // of a region, so generate a separate map type array in that case. - if (Info.separateBeginEndCalls()) { - bool EndMapTypesDiffer = false; - for (uint64_t &Type : Mapping) { - if (Type & MappableExprsHandler::OMP_MAP_PRESENT) { - Type &= ~MappableExprsHandler::OMP_MAP_PRESENT; - EndMapTypesDiffer = true; - } - } - if (EndMapTypesDiffer) { - MapTypesArrayInit = - llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping); - MaptypesName = CGM.getOpenMPRuntime().getName({"offload_maptypes"}); - MapTypesArrayGbl = new llvm::GlobalVariable( - CGM.getModule(), MapTypesArrayInit->getType(), - /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, - MapTypesArrayInit, MaptypesName); - MapTypesArrayGbl->setUnnamedAddr( - llvm::GlobalValue::UnnamedAddr::Global); - Info.MapTypesArrayEnd = MapTypesArrayGbl; - } + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); } + }; - for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) { - llvm::Value *BPVal = *CombinedInfo.BasePointers[I]; - llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.BasePointersArray, 0, I); - BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0)); - Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(BPVal, BPAddr); - - if (Info.requiresDevicePointerInfo()) - if (const ValueDecl *DevVD = - CombinedInfo.BasePointers[I].getDevicePtrDecl()) - Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr); - - llvm::Value *PVal = CombinedInfo.Pointers[I]; - llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, 0, I); - P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - P, PVal->getType()->getPointerTo(/*AddrSpace=*/0)); - Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy)); - CGF.Builder.CreateStore(PVal, PAddr); - - if (hasRuntimeEvaluationCaptureSize) { - llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), - Info.SizesArray, - /*Idx0=*/0, - /*Idx1=*/I); - Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty)); - CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I], - CGM.Int64Ty, - /*isSigned=*/true), - SAddr); - } - - // Fill up the mapper array. - llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); - if (CombinedInfo.Mappers[I]) { - MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( - cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); - MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy); - Info.HasMapper = true; - } - Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I); - CGF.Builder.CreateStore(MFunc, MAddr); + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); } - } - - if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() || - Info.NumberOfPtrs == 0) - return; - - emitNonContiguousDescriptor(CGF, CombinedInfo, Info); -} - -namespace { -/// Additional arguments for emitOffloadingArraysArgument function. -struct ArgumentsOptions { - bool ForEndCall = false; - ArgumentsOptions() = default; - ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {} -}; -} // namespace - -/// Emit the arguments to be passed to the runtime library based on the -/// arrays of base pointers, pointers, sizes, map types, and mappers. If -/// ForEndCall, emit map types to be passed for the end of the region instead of -/// the beginning. -static void emitOffloadingArraysArgument( - CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, - llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, - llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg, - llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info, - const ArgumentsOptions &Options = ArgumentsOptions()) { - assert((!Options.ForEndCall || Info.separateBeginEndCalls()) && - "expected region end call to runtime only when end call is separate"); - CodeGenModule &CGM = CGF.CGM; - if (Info.NumberOfPtrs) { - BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.BasePointersArray, - /*Idx0=*/0, /*Idx1=*/0); - PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.PointersArray, - /*Idx0=*/0, - /*Idx1=*/0); - SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray, - /*Idx0=*/0, /*Idx1=*/0); - MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), - Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd - : Info.MapTypesArray, - /*Idx0=*/0, - /*Idx1=*/0); - - // Only emit the mapper information arrays if debug information is - // requested. - if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) - MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - else - MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32( - llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs), - Info.MapNamesArray, - /*Idx0=*/0, - /*Idx1=*/0); - // If there is no user-defined mapper, set the mapper array to nullptr to - // avoid an unnecessary data privatization - if (!Info.HasMapper) - MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - else - MappersArrayArg = - CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy); - } else { - BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); - MapTypesArrayArg = - llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo()); - MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy); - } + return MFunc; + }; + OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info, + /*IsNonContiguous=*/true, DeviceAddrCB, + CustomMapperCB); } /// Check for inner distribute directive. @@ -9420,7 +8936,9 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: - if (isOpenMPDistributeDirective(DKind)) + // For now, treat 'target' with nested 'teams loop' as if it's + // distributed (target teams distribute). + if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop) return NestedDir; if (DKind == OMPD_teams) { Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers( @@ -9463,6 +8981,8 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: + case OMPD_unroll: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -9504,6 +9024,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { case OMPD_parallel_master_taskloop: case OMPD_parallel_master_taskloop_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: default: llvm_unreachable("Unexpected directive."); @@ -9520,14 +9041,16 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { /// void *base, void *begin, /// int64_t size, int64_t type, /// void *name = nullptr) { -/// // Allocate space for an array section first. -/// if (size > 1 && !maptype.IsDelete) +/// // Allocate space for an array section first or add a base/begin for +/// // pointer dereference. +/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) && +/// !maptype.IsDelete) /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, -/// size*sizeof(Ty), clearToFrom(type)); +/// size*sizeof(Ty), clearToFromMember(type)); /// // Map members. /// for (unsigned i = 0; i < size; i++) { /// // For each component specified by this mapper: -/// for (auto c : all_components) { +/// for (auto c : begin[i]->all_components) { /// if (c.hasMapper()) /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size, /// c.arg_type, c.arg_name); @@ -9540,7 +9063,7 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { /// // Delete the array section. /// if (size > 1 && maptype.IsDelete) /// __tgt_push_mapper_component(rt_mapper_handle, base, begin, -/// size*sizeof(Ty), clearToFrom(type)); +/// size*sizeof(Ty), clearToFromMember(type)); /// } /// \endcode void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, @@ -9555,20 +9078,21 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl()); SourceLocation Loc = D->getLocation(); CharUnits ElementSize = C.getTypeSizeInChars(Ty); + llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty); // Prepare mapper function arguments and attributes. ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, - C.VoidPtrTy, ImplicitParamDecl::Other); + C.VoidPtrTy, ImplicitParamKind::Other); ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, - ImplicitParamDecl::Other); + ImplicitParamKind::Other); FunctionArgList Args; Args.push_back(&HandleArg); Args.push_back(&BaseArg); @@ -9581,7 +9105,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); SmallString<64> TyStr; llvm::raw_svector_ostream Out(TyStr); - CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out); + CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty, Out); std::string Name = getName({"omp_mapper", TyStr, D->getName()}); auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); @@ -9590,20 +9114,10 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Start the mapper function code generation. CodeGenFunction MapperCGF(CGM); MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); - // Compute the starting and end addreses of array elements. + // Compute the starting and end addresses of array elements. llvm::Value *Size = MapperCGF.EmitLoadOfScalar( MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false, C.getPointerType(Int64Ty), Loc); - // Convert the size in bytes into the number of array elements. - Size = MapperCGF.Builder.CreateExactUDiv( - Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); - llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( - MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(), - CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy))); - llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(PtrBegin, Size); - llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( - MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, - C.getPointerType(Int64Ty), Loc); // Prepare common arguments for array initiation and deletion. llvm::Value *Handle = MapperCGF.EmitLoadOfScalar( MapperCGF.GetAddrOfLocalVar(&HandleArg), @@ -9614,12 +9128,24 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar( MapperCGF.GetAddrOfLocalVar(&BeginArg), /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); + // Convert the size in bytes into the number of array elements. + Size = MapperCGF.Builder.CreateExactUDiv( + Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity())); + llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast( + BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy)); + llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size); + llvm::Value *MapType = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false, + C.getPointerType(Int64Ty), Loc); + llvm::Value *MapName = MapperCGF.EmitLoadOfScalar( + MapperCGF.GetAddrOfLocalVar(&NameArg), + /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc); // Emit array initiation if this is an array section and \p MapType indicates // that memory allocation is required. llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head"); emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, - ElementSize, HeadBB, /*IsInit=*/true); + MapName, ElementSize, HeadBB, /*IsInit=*/true); // Emit a for loop to iterate through SizeArg of elements and map all of them. @@ -9639,23 +9165,19 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI( PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent"); PtrPHI->addIncoming(PtrBegin, EntryBB); - Address PtrCurrent = - Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg) - .getAlignment() - .alignmentOfArrayElement(ElementSize)); + Address PtrCurrent(PtrPHI, ElemTy, + MapperCGF.GetAddrOfLocalVar(&BeginArg) + .getAlignment() + .alignmentOfArrayElement(ElementSize)); // Privatize the declared variable of mapper to be the current array element. CodeGenFunction::OMPPrivateScope Scope(MapperCGF); - Scope.addPrivate(MapperVarDecl, [&MapperCGF, PtrCurrent, PtrTy]() { - return MapperCGF - .EmitLoadOfPointerLValue(PtrCurrent, PtrTy->castAs<PointerType>()) - .getAddress(MapperCGF); - }); + Scope.addPrivate(MapperVarDecl, PtrCurrent); (void)Scope.Privatize(); // Get map clause information. Fill up the arrays with all mapped variables. MappableExprsHandler::MapCombinedInfoTy Info; MappableExprsHandler MEHandler(*D, MapperCGF); - MEHandler.generateAllInfoForMapper(Info); + MEHandler.generateAllInfoForMapper(Info, OMPBuilder); // Call the runtime API __tgt_mapper_num_components to get the number of // pre-existing components. @@ -9671,38 +9193,22 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Fill up the runtime mapper handle for all components. for (unsigned I = 0; I < Info.BasePointers.size(); ++I) { llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast( - *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); + Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast( Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy)); llvm::Value *CurSizeArg = Info.Sizes[I]; llvm::Value *CurNameArg = - (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) + (CGM.getCodeGenOpts().getDebugInfo() == + llvm::codegenoptions::NoDebugInfo) ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy) : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]); // Extract the MEMBER_OF field from the map type. - llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member"); - MapperCGF.EmitBlock(MemberBB); - llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]); - llvm::Value *Member = MapperCGF.Builder.CreateAnd( - OriMapType, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF)); - llvm::BasicBlock *MemberCombineBB = - MapperCGF.createBasicBlock("omp.member.combine"); - llvm::BasicBlock *TypeBB = MapperCGF.createBasicBlock("omp.type"); - llvm::Value *IsMember = MapperCGF.Builder.CreateIsNull(Member); - MapperCGF.Builder.CreateCondBr(IsMember, TypeBB, MemberCombineBB); - // Add the number of pre-existing components to the MEMBER_OF field if it - // is valid. - MapperCGF.EmitBlock(MemberCombineBB); - llvm::Value *CombinedMember = + llvm::Value *OriMapType = MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + Info.Types[I])); + llvm::Value *MemberMapType = MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize); - // Do nothing if it is not a member of previous components. - MapperCGF.EmitBlock(TypeBB); - llvm::PHINode *MemberMapType = - MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.membermaptype"); - MemberMapType->addIncoming(OriMapType, MemberBB); - MemberMapType->addIncoming(CombinedMember, MemberCombineBB); // Combine the map type inherited from user-defined mapper with that // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM @@ -9718,8 +9224,10 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // tofrom | alloc | to | from | tofrom | release | delete llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd( MapType, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc"); llvm::BasicBlock *AllocElseBB = MapperCGF.createBasicBlock("omp.type.alloc.else"); @@ -9733,30 +9241,40 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, MapperCGF.EmitBlock(AllocBB); llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd( MemberMapType, - MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM))); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); MapperCGF.Builder.CreateBr(EndBB); MapperCGF.EmitBlock(AllocElseBB); llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ( LeftToFrom, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO))); MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB); // In case of to, clear OMP_MAP_FROM. MapperCGF.EmitBlock(ToBB); llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd( MemberMapType, - MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); MapperCGF.Builder.CreateBr(EndBB); MapperCGF.EmitBlock(ToElseBB); llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ( LeftToFrom, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB); // In case of from, clear OMP_MAP_TO. MapperCGF.EmitBlock(FromBB); llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd( MemberMapType, - MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO)); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO))); // In case of tofrom, do nothing. MapperCGF.EmitBlock(EndBB); LastBB = EndBB; @@ -9788,7 +9306,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Update the pointer to point to the next element that needs to be mapped, // and check whether we have mapped all elements. llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32( - PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); + ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next"); PtrPHI->addIncoming(PtrNext, LastBB); llvm::Value *IsDone = MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone"); @@ -9799,7 +9317,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, // Emit array deletion if this is an array section and \p MapType indicates // that deletion is required. emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType, - ElementSize, DoneBB, /*IsInit=*/false); + MapName, ElementSize, DoneBB, /*IsInit=*/false); // Emit the function exit block. MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true); @@ -9820,32 +9338,43 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D, void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base, llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType, - CharUnits ElementSize, llvm::BasicBlock *ExitBB, bool IsInit) { + llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB, + bool IsInit) { StringRef Prefix = IsInit ? ".init" : ".del"; // Evaluate if this is an array section. - llvm::BasicBlock *IsDeleteBB = - MapperCGF.createBasicBlock(getName({"omp.array", Prefix, ".evaldelete"})); llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock(getName({"omp.array", Prefix})); - llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGE( + llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT( Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray"); - MapperCGF.Builder.CreateCondBr(IsArray, IsDeleteBB, ExitBB); - - // Evaluate if we are going to delete this section. - MapperCGF.EmitBlock(IsDeleteBB); llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd( MapType, - MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE)); + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_DELETE))); llvm::Value *DeleteCond; + llvm::Value *Cond; if (IsInit) { + // base != begin? + llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin); + // IsPtrAndObj? + llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd( + MapType, + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ))); + PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit); + BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit); + Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin); DeleteCond = MapperCGF.Builder.CreateIsNull( DeleteBit, getName({"omp.array", Prefix, ".delete"})); } else { + Cond = IsArray; DeleteCond = MapperCGF.Builder.CreateIsNotNull( DeleteBit, getName({"omp.array", Prefix, ".delete"})); } - MapperCGF.Builder.CreateCondBr(DeleteCond, BodyBB, ExitBB); + Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond); + MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB); MapperCGF.EmitBlock(BodyBB); // Get the array size by multiplying element size and element number (i.e., \p @@ -9856,14 +9385,20 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel( // memory allocation/deletion purpose only. llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd( MapType, - MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO | - MappableExprsHandler::OMP_MAP_FROM))); - llvm::Value *MapNameArg = llvm::ConstantPointerNull::get(CGM.VoidPtrTy); + MapperCGF.Builder.getInt64( + ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_TO | + OpenMPOffloadMappingFlags::OMP_MAP_FROM))); + MapTypeArg = MapperCGF.Builder.CreateOr( + MapTypeArg, + MapperCGF.Builder.getInt64( + static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>( + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))); // Call the runtime API __tgt_push_mapper_component to fill up the runtime // data structure. llvm::Value *OffloadingArgs[] = {Handle, Base, Begin, - ArraySize, MapTypeArg, MapNameArg}; + ArraySize, MapTypeArg, MapName}; MapperCGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___tgt_push_mapper_component), @@ -9879,32 +9414,276 @@ llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc( return UDMMap.lookup(D); } -void CGOpenMPRuntime::emitTargetNumIterationsCall( +llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall( CodeGenFunction &CGF, const OMPExecutableDirective &D, - llvm::Value *DeviceID, llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> SizeEmitter) { OpenMPDirectiveKind Kind = D.getDirectiveKind(); const OMPExecutableDirective *TD = &D; - // Get nested teams distribute kind directive, if any. - if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) + // Get nested teams distribute kind directive, if any. For now, treat + // 'target_teams_loop' as if it's really a target_teams_distribute. + if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) && + Kind != OMPD_target_teams_loop) TD = getNestedDistributeDirective(CGM.getContext(), D); if (!TD) - return; + return llvm::ConstantInt::get(CGF.Int64Ty, 0); + const auto *LD = cast<OMPLoopDirective>(TD); - auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF, - PrePostActionTy &) { - if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) { - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper), - Args); + if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) + return NumIterations; + return llvm::ConstantInt::get(CGF.Int64Ty, 0); +} + +static void +emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + bool RequiresOuterTask, const CapturedStmt &CS, + bool OffloadingMandatory, CodeGenFunction &CGF) { + if (OffloadingMandatory) { + CGF.Builder.CreateUnreachable(); + } else { + if (RequiresOuterTask) { + CapturedVars.clear(); + CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); } + OMPRuntime->emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, + CapturedVars); + } +} + +static llvm::Value *emitDeviceID( + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + CodeGenFunction &CGF) { + // Emit device ID if any. + llvm::Value *DeviceID; + if (Device.getPointer()) { + assert((Device.getInt() == OMPC_DEVICE_unknown || + Device.getInt() == OMPC_DEVICE_device_num) && + "Expected device_num modifier."); + llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); + DeviceID = + CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } + return DeviceID; +} + +llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D, + CodeGenFunction &CGF) { + llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0); + + if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) { + CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF); + llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr( + DynMemClause->getSize(), /*IgnoreResultAssign=*/true); + DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty, + /*isSigned=*/false); + } + return DynCGroupMem; +} + +static void emitTargetCallKernelLaunch( + CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask, + const CapturedStmt &CS, bool OffloadingMandatory, + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device, + llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo, + llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray, + llvm::function_ref<llvm::Value *(CodeGenFunction &CGF, + const OMPLoopDirective &D)> + SizeEmitter, + CodeGenFunction &CGF, CodeGenModule &CGM) { + llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder(); + + // Fill up the arrays with all the captured variables. + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + + // Get mappable expression information. + MappableExprsHandler MEHandler(D, CGF); + llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; + llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; + + auto RI = CS.getCapturedRecordDecl()->field_begin(); + auto *CV = CapturedVars.begin(); + for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), + CE = CS.capture_end(); + CI != CE; ++CI, ++RI, ++CV) { + MappableExprsHandler::MapCombinedInfoTy CurInfo; + MappableExprsHandler::StructRangeInfoTy PartialStruct; + + // VLA sizes are passed to the outlined region by copy and do not have map + // information associated. + if (CI->capturesVariableArrayType()) { + CurInfo.Exprs.push_back(nullptr); + CurInfo.BasePointers.push_back(*CV); + CurInfo.DevicePtrDecls.push_back(nullptr); + CurInfo.DevicePointers.push_back( + MappableExprsHandler::DeviceInfoTy::None); + CurInfo.Pointers.push_back(*CV); + CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( + CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); + // Copy to the device as an argument. No need to retrieve it. + CurInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL | + OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM | + OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT); + CurInfo.Mappers.push_back(nullptr); + } else { + // If we have any information in the map clause, we use it, otherwise we + // just do a default mapping. + MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); + if (!CI->capturesThis()) + MappedVarSet.insert(CI->getCapturedVar()); + else + MappedVarSet.insert(nullptr); + if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) + MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); + // Generate correct mapping for variables captured by reference in + // lambdas. + if (CI->capturesVariable()) + MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, + CurInfo, LambdaPointers); + } + // We expect to have at least an element of information for this capture. + assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && + "Non-existing map pointer for capture!"); + assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && + CurInfo.BasePointers.size() == CurInfo.Sizes.size() && + CurInfo.BasePointers.size() == CurInfo.Types.size() && + CurInfo.BasePointers.size() == CurInfo.Mappers.size() && + "Inconsistent map information sizes!"); + + // If there is an entry in PartialStruct it means we have a struct with + // individual members mapped. Emit an extra combined entry. + if (PartialStruct.Base.isValid()) { + CombinedInfo.append(PartialStruct.PreliminaryMapData); + MEHandler.emitCombinedEntry( + CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(), + OMPBuilder, nullptr, + !PartialStruct.PreliminaryMapData.BasePointers.empty()); + } + + // We need to append the results of this capture to what we already have. + CombinedInfo.append(CurInfo); + } + // Adjust MEMBER_OF flags for the lambdas captures. + MEHandler.adjustMemberOfForLambdaCaptures( + OMPBuilder, LambdaPointers, CombinedInfo.BasePointers, + CombinedInfo.Pointers, CombinedInfo.Types); + // Map any list items in a map clause that were not captures because they + // weren't referenced within the construct. + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, MappedVarSet); + + CGOpenMPRuntime::TargetDataInfo Info; + // Fill up the arrays and create the arguments. + emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); + bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo; + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, + EmitDebug, + /*ForEndCall=*/false); + + InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; + InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, + CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.PointersArray = + Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.SizesArray = + Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); + InputInfo.MappersArray = + Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + MapTypesArray = Info.RTArgs.MapTypesArray; + MapNamesArray = Info.RTArgs.MapNamesArray; + + auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars, + RequiresOuterTask, &CS, OffloadingMandatory, Device, + OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray, + SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { + bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor; + + if (IsReverseOffloading) { + // Reverse offloading is not supported, so just execute on the host. + // FIXME: This fallback solution is incorrect since it ignores the + // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to + // assert here and ensure SEMA emits an error. + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return; + } + + bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>(); + unsigned NumTargetItems = InputInfo.NumberOfTargetItems; + + llvm::Value *BasePointersArray = + InputInfo.BasePointersArray.emitRawPointer(CGF); + llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF); + llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF); + llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF); + + auto &&EmitTargetCallFallbackCB = + [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) + -> llvm::OpenMPIRBuilder::InsertPointTy { + CGF.Builder.restoreIP(IP); + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + return CGF.Builder.saveIP(); + }; + + llvm::Value *DeviceID = emitDeviceID(Device, CGF); + llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D); + llvm::Value *NumThreads = + OMPRuntime->emitNumThreadsForTargetDirective(CGF, D); + llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, D.getBeginLoc()); + llvm::Value *NumIterations = + OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter); + llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator()); + + llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs( + BasePointersArray, PointersArray, SizesArray, MapTypesArray, + nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray); + + llvm::OpenMPIRBuilder::TargetKernelArgs Args( + NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads, + DynCGGroupMem, HasNoWait); + + CGF.Builder.restoreIP(OMPRuntime->getOMPBuilder().emitKernelLaunch( + CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args, + DeviceID, RTLoc, AllocaIP)); }; - emitInlinedDirective(CGF, OMPD_unknown, CodeGen); + + if (RequiresOuterTask) + CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); + else + OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); +} + +static void +emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn, + const OMPExecutableDirective &D, + llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, + bool RequiresOuterTask, const CapturedStmt &CS, + bool OffloadingMandatory, CodeGenFunction &CGF) { + + // Notify that the host version must be executed. + auto &&ElseGen = + [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { + emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, CGF); + }; + + if (RequiresOuterTask) { + CodeGenFunction::OMPTargetDataInfo InputInfo; + CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); + } else { + OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); + } } void CGOpenMPRuntime::emitTargetCall( @@ -9917,10 +9696,18 @@ void CGOpenMPRuntime::emitTargetCall( if (!CGF.HaveInsertPoint()) return; - assert(OutlinedFn && "Invalid outlined function!"); + const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice && + CGM.getLangOpts().OpenMPOffloadMandatory; - const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || - D.hasClausesOfKind<OMPNowaitClause>(); + assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!"); + + const bool RequiresOuterTask = + D.hasClausesOfKind<OMPDependClause>() || + D.hasClausesOfKind<OMPNowaitClause>() || + D.hasClausesOfKind<OMPInReductionClause>() || + (CGM.getLangOpts().OpenMP >= 51 && + needsTaskBasedThreadLimit(D.getDirectiveKind()) && + D.hasClausesOfKind<OMPThreadLimitClause>()); llvm::SmallVector<llvm::Value *, 16> CapturedVars; const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF, @@ -9932,270 +9719,24 @@ void CGOpenMPRuntime::emitTargetCall( CodeGenFunction::OMPTargetDataInfo InputInfo; llvm::Value *MapTypesArray = nullptr; llvm::Value *MapNamesArray = nullptr; - // Fill up the pointer arrays and transfer execution to the device. - auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo, - &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask, - &CapturedVars, - SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) { - if (Device.getInt() == OMPC_DEVICE_ancestor) { - // Reverse offloading is not supported, so just execute on the host. - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); - return; - } - - // On top of the arrays that were filled up, the target offloading call - // takes as arguments the device id as well as the host pointer. The host - // pointer is used by the runtime library to identify the current target - // region, so it only has to be unique and not necessarily point to - // anything. It could be the pointer to the outlined function that - // implements the target region, but we aren't using that so that the - // compiler doesn't need to keep that, and could therefore inline the host - // function if proven worthwhile during optimization. - - // From this point on, we need to have an ID of the target region defined. - assert(OutlinedFnID && "Invalid outlined function ID!"); - - // Emit device ID if any. - llvm::Value *DeviceID; - if (Device.getPointer()) { - assert((Device.getInt() == OMPC_DEVICE_unknown || - Device.getInt() == OMPC_DEVICE_device_num) && - "Expected device_num modifier."); - llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer()); - DeviceID = - CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true); - } else { - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); - } - - // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = - CGF.Builder.getInt32(InputInfo.NumberOfTargetItems); - - // Return value of the runtime offloading call. - llvm::Value *Return; - - llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); - llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); - - // Source location for the ident struct - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - - // Emit tripcount for the target loop-based directive. - emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter); - - bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>(); - // The target region is an outlined function launched by the runtime - // via calls __tgt_target() or __tgt_target_teams(). - // - // __tgt_target() launches a target region with one team and one thread, - // executing a serial region. This master thread may in turn launch - // more threads within its team upon encountering a parallel region, - // however, no additional teams can be launched on the device. - // - // __tgt_target_teams() launches a target region with one or more teams, - // each with one or more threads. This call is required for target - // constructs such as: - // 'target teams' - // 'target' / 'teams' - // 'target teams distribute parallel for' - // 'target parallel' - // and so on. - // - // Note that on the host and CPU targets, the runtime implementation of - // these calls simply call the outlined function without forking threads. - // The outlined functions themselves have runtime calls to - // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by - // the compiler in emitTeamsCall() and emitParallelCall(). - // - // In contrast, on the NVPTX target, the implementation of - // __tgt_target_teams() launches a GPU kernel with the requested number - // of teams and threads so no additional calls to the runtime are required. - if (NumTeams) { - // If we have NumTeams defined this means that we have an enclosed teams - // region. Therefore we also expect to have NumThreads defined. These two - // values should be defined in the presence of a teams directive, - // regardless of having any clauses associated. If the user is using teams - // but no clauses, these two values will be the default that should be - // passed to the runtime library - a 32-bit integer with the value zero. - assert(NumThreads && "Thread limit expression should be available along " - "with number of teams."); - llvm::Value *OffloadingArgs[] = {RTLoc, - DeviceID, - OutlinedFnID, - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer(), - NumTeams, - NumThreads}; - Return = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), HasNowait - ? OMPRTL___tgt_target_teams_nowait_mapper - : OMPRTL___tgt_target_teams_mapper), - OffloadingArgs); - } else { - llvm::Value *OffloadingArgs[] = {RTLoc, - DeviceID, - OutlinedFnID, - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer()}; - Return = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper - : OMPRTL___tgt_target_mapper), - OffloadingArgs); - } - - // Check the error code and execute the host version if required. - llvm::BasicBlock *OffloadFailedBlock = - CGF.createBasicBlock("omp_offload.failed"); - llvm::BasicBlock *OffloadContBlock = - CGF.createBasicBlock("omp_offload.cont"); - llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return); - CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock); - - CGF.EmitBlock(OffloadFailedBlock); - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); - CGF.EmitBranch(OffloadContBlock); - - CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true); - }; - - // Notify that the host version must be executed. - auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars, - RequiresOuterTask](CodeGenFunction &CGF, - PrePostActionTy &) { - if (RequiresOuterTask) { - CapturedVars.clear(); - CGF.GenerateOpenMPCapturedVars(CS, CapturedVars); - } - emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars); - }; - - auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray, - &MapNamesArray, &CapturedVars, RequiresOuterTask, - &CS](CodeGenFunction &CGF, PrePostActionTy &) { - // Fill up the arrays with all the captured variables. - MappableExprsHandler::MapCombinedInfoTy CombinedInfo; - - // Get mappable expression information. - MappableExprsHandler MEHandler(D, CGF); - llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers; - llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet; - - auto RI = CS.getCapturedRecordDecl()->field_begin(); - auto CV = CapturedVars.begin(); - for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(), - CE = CS.capture_end(); - CI != CE; ++CI, ++RI, ++CV) { - MappableExprsHandler::MapCombinedInfoTy CurInfo; - MappableExprsHandler::StructRangeInfoTy PartialStruct; - - // VLA sizes are passed to the outlined region by copy and do not have map - // information associated. - if (CI->capturesVariableArrayType()) { - CurInfo.Exprs.push_back(nullptr); - CurInfo.BasePointers.push_back(*CV); - CurInfo.Pointers.push_back(*CV); - CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast( - CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true)); - // Copy to the device as an argument. No need to retrieve it. - CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL | - MappableExprsHandler::OMP_MAP_TARGET_PARAM | - MappableExprsHandler::OMP_MAP_IMPLICIT); - CurInfo.Mappers.push_back(nullptr); - } else { - // If we have any information in the map clause, we use it, otherwise we - // just do a default mapping. - MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct); - if (!CI->capturesThis()) - MappedVarSet.insert(CI->getCapturedVar()); - else - MappedVarSet.insert(nullptr); - if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid()) - MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo); - // Generate correct mapping for variables captured by reference in - // lambdas. - if (CI->capturesVariable()) - MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV, - CurInfo, LambdaPointers); - } - // We expect to have at least an element of information for this capture. - assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && - "Non-existing map pointer for capture!"); - assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() && - CurInfo.BasePointers.size() == CurInfo.Sizes.size() && - CurInfo.BasePointers.size() == CurInfo.Types.size() && - CurInfo.BasePointers.size() == CurInfo.Mappers.size() && - "Inconsistent map information sizes!"); - // If there is an entry in PartialStruct it means we have a struct with - // individual members mapped. Emit an extra combined entry. - if (PartialStruct.Base.isValid()) - MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, - nullptr, /*NoTargetParam=*/false); - - // We need to append the results of this capture to what we already have. - CombinedInfo.append(CurInfo); - } - // Adjust MEMBER_OF flags for the lambdas captures. - MEHandler.adjustMemberOfForLambdaCaptures( - LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers, - CombinedInfo.Types); - // Map any list items in a map clause that were not captures because they - // weren't referenced within the construct. - MEHandler.generateAllInfo(CombinedInfo, MappedVarSet); - - TargetDataInfo Info; - // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder); - emitOffloadingArraysArgument( - CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, - {/*ForEndTask=*/false}); - - InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; - InputInfo.BasePointersArray = - Address(Info.BasePointersArray, CGM.getPointerAlign()); - InputInfo.PointersArray = - Address(Info.PointersArray, CGM.getPointerAlign()); - InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign()); - InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); - MapTypesArray = Info.MapTypesArray; - MapNamesArray = Info.MapNamesArray; - if (RequiresOuterTask) - CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); - else - emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen); + auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars, + RequiresOuterTask, &CS, OffloadingMandatory, Device, + OutlinedFnID, &InputInfo, &MapTypesArray, + &MapNamesArray, SizeEmitter](CodeGenFunction &CGF, + PrePostActionTy &) { + emitTargetCallKernelLaunch(this, OutlinedFn, D, CapturedVars, + RequiresOuterTask, CS, OffloadingMandatory, + Device, OutlinedFnID, InputInfo, MapTypesArray, + MapNamesArray, SizeEmitter, CGF, CGM); }; - auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask]( - CodeGenFunction &CGF, PrePostActionTy &) { - if (RequiresOuterTask) { - CodeGenFunction::OMPTargetDataInfo InputInfo; - CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo); - } else { - emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen); - } - }; + auto &&TargetElseGen = + [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS, + OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) { + emitTargetCallElse(this, OutlinedFn, D, CapturedVars, RequiresOuterTask, + CS, OffloadingMandatory, CGF); + }; // If we have a target function ID it means that we need to support // offloading, otherwise, just execute on the host. We need to execute on host @@ -10227,16 +9768,13 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, if (RequiresDeviceCodegen) { const auto &E = *cast<OMPExecutableDirective>(S); - unsigned DeviceID; - unsigned FileID; - unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID, - FileID, Line); + + llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc( + CGM, OMPBuilder, E.getBeginLoc(), ParentName); // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. - if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID, - ParentName, Line)) + if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) return; switch (E.getDirectiveKind()) { @@ -10283,6 +9821,14 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, CGM, ParentName, cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E)); break; + case OMPD_target_teams_loop: + CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( + CGM, ParentName, cast<OMPTargetTeamsGenericLoopDirective>(E)); + break; + case OMPD_target_parallel_loop: + CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( + CGM, ParentName, cast<OMPTargetParallelGenericLoopDirective>(E)); + break; case OMPD_parallel: case OMPD_for: case OMPD_parallel_for: @@ -10297,6 +9843,8 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: + case OMPD_unroll: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -10338,6 +9886,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, case OMPD_parallel_master_taskloop: case OMPD_parallel_master_taskloop_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: default: llvm_unreachable("Unknown target directive for OpenMP device codegen."); @@ -10362,17 +9911,28 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, scanForTargetRegionsFunctions(II, ParentName); } +static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) { + std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = + OMPDeclareTargetDeclAttr::getDeviceType(VD); + if (!DevTy) + return false; + // Do not emit device_type(nohost) functions for the host. + if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + return true; + // Do not emit device_type(host) functions for the device. + if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host) + return true; + return false; +} + bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { // If emitting code for the host, we do not process FD here. Instead we do // the normal code generation. - if (!CGM.getLangOpts().OpenMPIsDevice) { - if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) { - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD); - // Do not emit device_type(nohost) functions for the host. - if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) + if (!CGM.getLangOpts().OpenMPIsTargetDevice) { + if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl())) + if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), + CGM.getLangOpts().OpenMPIsTargetDevice)) return true; - } return false; } @@ -10381,10 +9941,8 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { if (const auto *FD = dyn_cast<FunctionDecl>(VD)) { StringRef Name = CGM.getMangledName(GD); scanForTargetRegionsFunctions(FD->getBody(), Name); - Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy = - OMPDeclareTargetDeclAttr::getDeviceType(FD); - // Do not emit device_type(nohost) functions for the host. - if (DevTy && *DevTy == OMPDeclareTargetDeclAttr::DT_Host) + if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD), + CGM.getLangOpts().OpenMPIsTargetDevice)) return true; } @@ -10394,7 +9952,11 @@ bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) { } bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { - if (!CGM.getLangOpts().OpenMPIsDevice) + if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()), + CGM.getLangOpts().OpenMPIsTargetDevice)) + return true; + + if (!CGM.getLangOpts().OpenMPIsTargetDevice) return false; // Check if there are Ctors/Dtors in this declaration and look for target @@ -10415,11 +9977,12 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { } // Do not to emit variable if it is not marked as declare target. - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration( cast<VarDecl>(GD.getDecl())); if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) { DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl())); return true; @@ -10427,49 +9990,23 @@ bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) { return false; } -llvm::Constant * -CGOpenMPRuntime::registerTargetFirstprivateCopy(CodeGenFunction &CGF, - const VarDecl *VD) { - assert(VD->getType().isConstant(CGM.getContext()) && - "Expected constant variable."); - StringRef VarName; - llvm::Constant *Addr; - llvm::GlobalValue::LinkageTypes Linkage; - QualType Ty = VD->getType(); - SmallString<128> Buffer; - { - unsigned DeviceID; - unsigned FileID; - unsigned Line; - getTargetEntryUniqueInfo(CGM.getContext(), VD->getLocation(), DeviceID, - FileID, Line); - llvm::raw_svector_ostream OS(Buffer); - OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID) - << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; - VarName = OS.str(); - } - Linkage = llvm::GlobalValue::InternalLinkage; - Addr = - getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(Ty), VarName, - getDefaultFirstprivateAddressSpace()); - cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage); - CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty); - CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr)); - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( - VarName, Addr, VarSize, - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo, Linkage); - return Addr; -} - void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr) { if (CGM.getLangOpts().OMPTargetTriples.empty() && - !CGM.getLangOpts().OpenMPIsDevice) + !CGM.getLangOpts().OpenMPIsTargetDevice) return; - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); + + // If this is an 'extern' declaration we defer to the canonical definition and + // do not emit an offloading entry. + if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link && + VD->hasExternalStorage()) + return; + if (!Res) { - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsTargetDevice) { // Register non-target variables being emitted in device code (debug info // may cause this). StringRef VarName = CGM.getMangledName(VD); @@ -10477,59 +10014,27 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, } return; } - // Register declare target variables. - OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags; - StringRef VarName; - CharUnits VarSize; - llvm::GlobalValue::LinkageTypes Linkage; - - if (*Res == OMPDeclareTargetDeclAttr::MT_To && - !HasRequiresUnifiedSharedMemory) { - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; - VarName = CGM.getMangledName(VD); - if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) { - VarSize = CGM.getContext().getTypeSizeInChars(VD->getType()); - assert(!VarSize.isZero() && "Expected non-zero size of the variable"); - } else { - VarSize = CharUnits::Zero(); - } - Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); - // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { - std::string RefName = getName({VarName, "ref"}); - if (!CGM.GetGlobalValue(RefName)) { - llvm::Constant *AddrRef = - getOrCreateInternalVariable(Addr->getType(), RefName); - auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef); - GVAddrRef->setConstant(/*Val=*/true); - GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage); - GVAddrRef->setInitializer(Addr); - CGM.addCompilerUsedGlobal(GVAddrRef); - } - } - } else { - assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) || - (*Res == OMPDeclareTargetDeclAttr::MT_To && - HasRequiresUnifiedSharedMemory)) && - "Declare target attribute must link or to with unified memory."); - if (*Res == OMPDeclareTargetDeclAttr::MT_Link) - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink; - else - Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo; - if (CGM.getLangOpts().OpenMPIsDevice) { - VarName = Addr->getName(); - Addr = nullptr; - } else { - VarName = getAddrOfDeclareTargetVar(VD).getName(); - Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer()); - } - VarSize = CGM.getPointerSize(); - Linkage = llvm::GlobalValue::WeakAnyLinkage; - } + auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(VD); }; + auto LinkageForVariable = [&VD, this]() { + return CGM.getLLVMLinkageVarDefinition(VD); + }; + + std::vector<llvm::GlobalVariable *> GeneratedRefs; + OMPBuilder.registerTargetGlobalVariable( + convertCaptureClause(VD), convertDeviceClause(VD), + VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly, + VD->isExternallyVisible(), + getEntryInfoFromPresumedLoc(CGM, OMPBuilder, + VD->getCanonicalDecl()->getBeginLoc()), + CGM.getMangledName(VD), GeneratedRefs, CGM.getLangOpts().OpenMPSimd, + CGM.getLangOpts().OMPTargetTriples, AddrOfGlobal, LinkageForVariable, + CGM.getTypes().ConvertTypeForMem( + CGM.getContext().getPointerType(VD->getType())), + Addr); - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( - VarName, Addr, VarSize, Flags, Linkage); + for (auto *ref : GeneratedRefs) + CGM.addCompilerUsedGlobal(ref); } bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { @@ -10542,16 +10047,18 @@ bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) { void CGOpenMPRuntime::emitDeferredTargetDecls() const { for (const VarDecl *VD : DeferredGlobalVariables) { - llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = + std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); if (!Res) continue; - if (*Res == OMPDeclareTargetDeclAttr::MT_To && + if ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && !HasRequiresUnifiedSharedMemory) { CGM.EmitGlobal(VD); } else { assert((*Res == OMPDeclareTargetDeclAttr::MT_Link || - (*Res == OMPDeclareTargetDeclAttr::MT_To && + ((*Res == OMPDeclareTargetDeclAttr::MT_To || + *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."); (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD); @@ -10569,6 +10076,7 @@ void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) { for (const OMPClause *Clause : D->clauselists()) { if (Clause->getClauseKind() == OMPC_unified_shared_memory) { HasRequiresUnifiedSharedMemory = true; + OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true); } else if (const auto *AC = dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) { switch (AC->getAtomicDefaultMemOrderKind()) { @@ -10624,19 +10132,19 @@ bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const { CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII( CodeGenModule &CGM) : CGM(CGM) { - if (CGM.getLangOpts().OpenMPIsDevice) { + if (CGM.getLangOpts().OpenMPIsTargetDevice) { SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal; CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false; } } CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() { - if (CGM.getLangOpts().OpenMPIsDevice) + if (CGM.getLangOpts().OpenMPIsTargetDevice) CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal; } bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { - if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal) + if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal) return true; const auto *D = cast<FunctionDecl>(GD.getDecl()); @@ -10655,48 +10163,6 @@ bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) { return !AlreadyEmittedTargetDecls.insert(D).second; } -llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { - // If we don't have entries or if we are emitting code for the device, we - // don't need to do anything. - if (CGM.getLangOpts().OMPTargetTriples.empty() || - CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || - (OffloadEntriesInfoManager.empty() && - !HasEmittedDeclareTargetRegion && - !HasEmittedTargetRegion)) - return nullptr; - - // Create and register the function that handles the requires directives. - ASTContext &C = CGM.getContext(); - - llvm::Function *RequiresRegFn; - { - CodeGenFunction CGF(CGM); - const auto &FI = CGM.getTypes().arrangeNullaryFunction(); - llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); - std::string ReqName = getName({"omp_offloading", "requires_reg"}); - RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI); - CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {}); - OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE; - // TODO: check for other requires clauses. - // The requires directive takes effect only when a target region is - // present in the compilation unit. Otherwise it is ignored and not - // passed to the runtime. This avoids the runtime from throwing an error - // for mismatching requires clauses across compilation units that don't - // contain at least 1 target region. - assert((HasEmittedTargetRegion || - HasEmittedDeclareTargetRegion || - !OffloadEntriesInfoManager.empty()) && - "Target or declare target region expected."); - if (HasRequiresUnifiedSharedMemory) - Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_register_requires), - llvm::ConstantInt::get(CGM.Int64Ty, Flags)); - CGF.FinishFunction(); - } - return RequiresRegFn; -} - void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, @@ -10751,9 +10217,28 @@ void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF, PushNumTeamsArgs); } +void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF, + const Expr *ThreadLimit, + SourceLocation Loc) { + llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *ThreadLimitVal = + ThreadLimit + ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit), + CGF.CGM.Int32Ty, /* isSigned = */ true) + : CGF.Builder.getInt32(0); + + // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit) + llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc), + ThreadLimitVal}; + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_set_thread_limit), + ThreadLimitArgs); +} + void CGOpenMPRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, - const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { + const Expr *Device, const RegionCodeGenTy &CodeGen, + CGOpenMPRuntime::TargetDataInfo &Info) { if (!CGF.HaveInsertPoint()) return; @@ -10761,148 +10246,94 @@ void CGOpenMPRuntime::emitTargetDataCalls( // off. PrePostActionTy NoPrivAction; - // Generate the code for the opening of the data environment. Capture all the - // arguments of the runtime call by reference because they are used in the - // closing of the region. - auto &&BeginThenGen = [this, &D, Device, &Info, - &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) { - // Fill up the arrays with all the mapped variables. - MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - // Get map clause information. - MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(CombinedInfo); + llvm::Value *IfCondVal = nullptr; + if (IfCond) + IfCondVal = CGF.EvaluateExprAsBool(IfCond); - // Fill up the arrays and create the arguments. - emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, - /*IsNonContiguous=*/true); + // Emit device ID if any. + llvm::Value *DeviceID = nullptr; + if (Device) { + DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), + CGF.Int64Ty, /*isSigned=*/true); + } else { + DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + } - llvm::Value *BasePointersArrayArg = nullptr; - llvm::Value *PointersArrayArg = nullptr; - llvm::Value *SizesArrayArg = nullptr; - llvm::Value *MapTypesArrayArg = nullptr; - llvm::Value *MapNamesArrayArg = nullptr; - llvm::Value *MappersArrayArg = nullptr; - emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, - MapNamesArrayArg, MappersArrayArg, Info); + // Fill up the arrays with all the mapped variables. + MappableExprsHandler::MapCombinedInfoTy CombinedInfo; + auto GenMapInfoCB = + [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & { + CGF.Builder.restoreIP(CodeGenIP); + // Get map clause information. + MappableExprsHandler MEHandler(D, CGF); + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); - // Emit device ID if any. - llvm::Value *DeviceID = nullptr; - if (Device) { - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - } else { - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) { + return emitMappingInformation(CGF, OMPBuilder, MapExpr); + }; + if (CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo) { + CombinedInfo.Names.resize(CombinedInfo.Exprs.size()); + llvm::transform(CombinedInfo.Exprs, CombinedInfo.Names.begin(), + FillInfoMap); } - // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); - // - // Source location for the ident struct - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - - llvm::Value *OffloadingArgs[] = {RTLoc, - DeviceID, - PointerNum, - BasePointersArrayArg, - PointersArrayArg, - SizesArrayArg, - MapTypesArrayArg, - MapNamesArrayArg, - MappersArrayArg}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper), - OffloadingArgs); - - // If device pointer privatization is required, emit the body of the region - // here. It will have to be duplicated: with and without privatization. - if (!Info.CaptureDeviceAddrMap.empty()) - CodeGen(CGF); + return CombinedInfo; }; - - // Generate code for the closing of the data region. - auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF, - PrePostActionTy &) { - assert(Info.isValid() && "Invalid data environment closing arguments."); - - llvm::Value *BasePointersArrayArg = nullptr; - llvm::Value *PointersArrayArg = nullptr; - llvm::Value *SizesArrayArg = nullptr; - llvm::Value *MapTypesArrayArg = nullptr; - llvm::Value *MapNamesArrayArg = nullptr; - llvm::Value *MappersArrayArg = nullptr; - emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg, - SizesArrayArg, MapTypesArrayArg, - MapNamesArrayArg, MappersArrayArg, Info, - {/*ForEndCall=*/true}); - - // Emit device ID if any. - llvm::Value *DeviceID = nullptr; - if (Device) { - DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), - CGF.Int64Ty, /*isSigned=*/true); - } else { - DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); + using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy; + auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) { + CGF.Builder.restoreIP(CodeGenIP); + switch (BodyGenType) { + case BodyGenTy::Priv: + if (!Info.CaptureDeviceAddrMap.empty()) + CodeGen(CGF); + break; + case BodyGenTy::DupNoPriv: + if (!Info.CaptureDeviceAddrMap.empty()) { + CodeGen.setAction(NoPrivAction); + CodeGen(CGF); + } + break; + case BodyGenTy::NoPriv: + if (Info.CaptureDeviceAddrMap.empty()) { + CodeGen.setAction(NoPrivAction); + CodeGen(CGF); + } + break; } - - // Emit the number of elements in the offloading arrays. - llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs); - - // Source location for the ident struct - llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - - llvm::Value *OffloadingArgs[] = {RTLoc, - DeviceID, - PointerNum, - BasePointersArrayArg, - PointersArrayArg, - SizesArrayArg, - MapTypesArrayArg, - MapNamesArrayArg, - MappersArrayArg}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___tgt_target_data_end_mapper), - OffloadingArgs); + return InsertPointTy(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); }; - // If we need device pointer privatization, we need to emit the body of the - // region with no privatization in the 'else' branch of the conditional. - // Otherwise, we don't have to do anything. - auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF, - PrePostActionTy &) { - if (!Info.CaptureDeviceAddrMap.empty()) { - CodeGen.setAction(NoPrivAction); - CodeGen(CGF); + auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) { + if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) { + Info.CaptureDeviceAddrMap.try_emplace(DevVD, NewDecl); } }; - // We don't have to do anything to close the region if the if clause evaluates - // to false. - auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {}; - - if (IfCond) { - emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen); - } else { - RegionCodeGenTy RCG(BeginThenGen); - RCG(CGF); - } + auto CustomMapperCB = [&](unsigned int I) { + llvm::Value *MFunc = nullptr; + if (CombinedInfo.Mappers[I]) { + Info.HasMapper = true; + MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc( + cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I])); + } + return MFunc; + }; - // If we don't require privatization of device pointers, we emit the body in - // between the runtime calls. This avoids duplicating the body code. - if (Info.CaptureDeviceAddrMap.empty()) { - CodeGen.setAction(NoPrivAction); - CodeGen(CGF); - } + // Source location for the ident struct + llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - if (IfCond) { - emitIfClause(CGF, IfCond, EndThenGen, EndElseGen); - } else { - RegionCodeGenTy RCG(EndThenGen); - RCG(CGF); - } + InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(), + CGF.AllocaInsertPt->getIterator()); + InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(), + CGF.Builder.GetInsertPoint()); + llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); + CGF.Builder.restoreIP(OMPBuilder.createTargetData( + OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, + /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, CustomMapperCB, RTLoc)); } void CGOpenMPRuntime::emitTargetDataStandAloneCall( @@ -10938,15 +10369,12 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Source location for the ident struct llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc()); - llvm::Value *OffloadingArgs[] = {RTLoc, - DeviceID, - PointerNum, - InputInfo.BasePointersArray.getPointer(), - InputInfo.PointersArray.getPointer(), - InputInfo.SizesArray.getPointer(), - MapTypesArray, - MapNamesArray, - InputInfo.MappersArray.getPointer()}; + SmallVector<llvm::Value *, 13> OffloadingArgs( + {RTLoc, DeviceID, PointerNum, + InputInfo.BasePointersArray.emitRawPointer(CGF), + InputInfo.PointersArray.emitRawPointer(CGF), + InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray, + InputInfo.MappersArray.emitRawPointer(CGF)}); // Select the right runtime function call for each standalone // directive. @@ -10979,6 +10407,8 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_allocate: case OMPD_task: case OMPD_simd: + case OMPD_tile: + case OMPD_unroll: case OMPD_sections: case OMPD_section: case OMPD_single: @@ -11027,11 +10457,18 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( case OMPD_target_parallel_for: case OMPD_target_parallel_for_simd: case OMPD_requires: + case OMPD_metadirective: case OMPD_unknown: default: llvm_unreachable("Unexpected standalone target data directive."); break; } + if (HasNowait) { + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.Int32Ty)); + OffloadingArgs.push_back(llvm::Constant::getNullValue(CGF.VoidPtrTy)); + } CGF.EmitRuntimeCall( OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn), OffloadingArgs); @@ -11045,28 +10482,30 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( // Get map clause information. MappableExprsHandler MEHandler(D, CGF); - MEHandler.generateAllInfo(CombinedInfo); + MEHandler.generateAllInfo(CombinedInfo, OMPBuilder); - TargetDataInfo Info; + CGOpenMPRuntime::TargetDataInfo Info; // Fill up the arrays and create the arguments. emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder, /*IsNonContiguous=*/true); bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() || D.hasClausesOfKind<OMPNowaitClause>(); - emitOffloadingArraysArgument( - CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, - Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info, - {/*ForEndTask=*/false}); + bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() != + llvm::codegenoptions::NoDebugInfo; + OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info, + EmitDebug, + /*ForEndCall=*/false); InputInfo.NumberOfTargetItems = Info.NumberOfPtrs; - InputInfo.BasePointersArray = - Address(Info.BasePointersArray, CGM.getPointerAlign()); - InputInfo.PointersArray = - Address(Info.PointersArray, CGM.getPointerAlign()); + InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray, + CGF.VoidPtrTy, CGM.getPointerAlign()); + InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, + CGM.getPointerAlign()); InputInfo.SizesArray = - Address(Info.SizesArray, CGM.getPointerAlign()); - InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign()); - MapTypesArray = Info.MapTypesArray; - MapNamesArray = Info.MapNamesArray; + Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign()); + InputInfo.MappersArray = + Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign()); + MapTypesArray = Info.RTArgs.MapTypesArray; + MapNamesArray = Info.RTArgs.MapNamesArray; if (RequiresOuterTask) CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo); else @@ -11084,13 +10523,21 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall( namespace { /// Kind of parameter in a function with 'declare simd' directive. - enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector }; - /// Attribute set of the parameter. - struct ParamAttrTy { - ParamKindTy Kind = Vector; - llvm::APSInt StrideOrArg; - llvm::APSInt Alignment; - }; +enum ParamKindTy { + Linear, + LinearRef, + LinearUVal, + LinearVal, + Uniform, + Vector, +}; +/// Attribute set of the parameter. +struct ParamAttrTy { + ParamKindTy Kind = Vector; + llvm::APSInt StrideOrArg; + llvm::APSInt Alignment; + bool HasVarStride = false; +}; } // namespace static unsigned evaluateCDTSize(const FunctionDecl *FD, @@ -11145,6 +10592,52 @@ static unsigned evaluateCDTSize(const FunctionDecl *FD, return C.getTypeSize(CDT); } +/// Mangle the parameter part of the vector function name according to +/// their OpenMP classification. The mangling function is defined in +/// section 4.5 of the AAVFABI(2021Q1). +static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { + SmallString<256> Buffer; + llvm::raw_svector_ostream Out(Buffer); + for (const auto &ParamAttr : ParamAttrs) { + switch (ParamAttr.Kind) { + case Linear: + Out << 'l'; + break; + case LinearRef: + Out << 'R'; + break; + case LinearUVal: + Out << 'U'; + break; + case LinearVal: + Out << 'L'; + break; + case Uniform: + Out << 'u'; + break; + case Vector: + Out << 'v'; + break; + } + if (ParamAttr.HasVarStride) + Out << "s" << ParamAttr.StrideOrArg; + else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef || + ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) { + // Don't print the step value if it is not present or if it is + // equal to 1. + if (ParamAttr.StrideOrArg < 0) + Out << 'n' << -ParamAttr.StrideOrArg; + else if (ParamAttr.StrideOrArg != 1) + Out << ParamAttr.StrideOrArg; + } + + if (!!ParamAttr.Alignment) + Out << 'a' << ParamAttr.Alignment; + } + + return std::string(Out.str()); +} + static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, @@ -11193,26 +10686,7 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, } else { Out << VLENVal; } - for (const ParamAttrTy &ParamAttr : ParamAttrs) { - switch (ParamAttr.Kind){ - case LinearWithVarStride: - Out << 's' << ParamAttr.StrideOrArg; - break; - case Linear: - Out << 'l'; - if (ParamAttr.StrideOrArg != 1) - Out << ParamAttr.StrideOrArg; - break; - case Uniform: - Out << 'u'; - break; - case Vector: - Out << 'v'; - break; - } - if (!!ParamAttr.Alignment) - Out << 'a' << ParamAttr.Alignment; - } + Out << mangleVectorParameters(ParamAttrs); Out << '_' << Fn->getName(); Fn->addFnAttr(Out.str()); } @@ -11225,11 +10699,7 @@ emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, // available at // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi. -/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI. -/// -/// TODO: Need to implement the behavior for reference marked with a -/// var or no linear modifiers (1.b in the section). For this, we -/// need to extend ParamKindTy to support the linear modifiers. +/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1). static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { QT = QT.getCanonicalType(); @@ -11239,12 +10709,11 @@ static bool getAArch64MTV(QualType QT, ParamKindTy Kind) { if (Kind == ParamKindTy::Uniform) return false; - if (Kind == ParamKindTy::Linear) + if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef) return false; - // TODO: Handle linear references with modifiers - - if (Kind == ParamKindTy::LinearWithVarStride) + if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) && + !QT->isReferenceType()) return false; return true; @@ -11313,11 +10782,11 @@ getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { assert(!Sizes.empty() && "Unable to determine NDS and WDS."); // The LS of a function parameter / return value can only be a power // of 2, starting from 8 bits, up to 128. - assert(std::all_of(Sizes.begin(), Sizes.end(), - [](unsigned Size) { - return Size == 8 || Size == 16 || Size == 32 || - Size == 64 || Size == 128; - }) && + assert(llvm::all_of(Sizes, + [](unsigned Size) { + return Size == 8 || Size == 16 || Size == 32 || + Size == 64 || Size == 128; + }) && "Invalid size"); return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)), @@ -11325,39 +10794,6 @@ getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) { OutputBecomesInput); } -/// Mangle the parameter part of the vector function name according to -/// their OpenMP classification. The mangling function is defined in -/// section 3.5 of the AAVFABI. -static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) { - SmallString<256> Buffer; - llvm::raw_svector_ostream Out(Buffer); - for (const auto &ParamAttr : ParamAttrs) { - switch (ParamAttr.Kind) { - case LinearWithVarStride: - Out << "ls" << ParamAttr.StrideOrArg; - break; - case Linear: - Out << 'l'; - // Don't print the step value if it is not present or if it is - // equal to 1. - if (ParamAttr.StrideOrArg != 1) - Out << ParamAttr.StrideOrArg; - break; - case Uniform: - Out << 'u'; - break; - case Vector: - Out << 'v'; - break; - } - - if (!!ParamAttr.Alignment) - Out << 'a' << ParamAttr.Alignment; - } - - return std::string(Out.str()); -} - // Function used to add the attribute. The parameter `VLEN` is // templated to allow the use of "x" when targeting scalable functions // for SVE. @@ -11524,16 +10960,16 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn) { ASTContext &C = CGM.getContext(); FD = FD->getMostRecentDecl(); - // Map params to their positions in function decl. - llvm::DenseMap<const Decl *, unsigned> ParamPositions; - if (isa<CXXMethodDecl>(FD)) - ParamPositions.try_emplace(FD, 0); - unsigned ParamPos = ParamPositions.size(); - for (const ParmVarDecl *P : FD->parameters()) { - ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); - ++ParamPos; - } while (FD) { + // Map params to their positions in function decl. + llvm::DenseMap<const Decl *, unsigned> ParamPositions; + if (isa<CXXMethodDecl>(FD)) + ParamPositions.try_emplace(FD, 0); + unsigned ParamPos = ParamPositions.size(); + for (const ParmVarDecl *P : FD->parameters()) { + ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos); + ++ParamPos; + } for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) { llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size()); // Mark uniform parameters. @@ -11545,12 +10981,14 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, } else { const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; + auto It = ParamPositions.find(PVD); + assert(It != ParamPositions.end() && "Function parameter not found"); + Pos = It->second; } ParamAttrs[Pos].Kind = Uniform; } // Get alignment info. - auto NI = Attr->alignments_begin(); + auto *NI = Attr->alignments_begin(); for (const Expr *E : Attr->aligneds()) { E = E->IgnoreParenImpCasts(); unsigned Pos; @@ -11561,7 +10999,9 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, } else { const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; + auto It = ParamPositions.find(PVD); + assert(It != ParamPositions.end() && "Function parameter not found"); + Pos = It->second; ParmTy = PVD->getType(); } ParamAttrs[Pos].Alignment = @@ -11573,27 +11013,48 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, ++NI; } // Mark linear parameters. - auto SI = Attr->steps_begin(); - auto MI = Attr->modifiers_begin(); + auto *SI = Attr->steps_begin(); + auto *MI = Attr->modifiers_begin(); for (const Expr *E : Attr->linears()) { E = E->IgnoreParenImpCasts(); unsigned Pos; + bool IsReferenceType = false; // Rescaling factor needed to compute the linear parameter // value in the mangled name. unsigned PtrRescalingFactor = 1; if (isa<CXXThisExpr>(E)) { Pos = ParamPositions[FD]; + auto *P = cast<PointerType>(E->getType()); + PtrRescalingFactor = CGM.getContext() + .getTypeSizeInChars(P->getPointeeType()) + .getQuantity(); } else { const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl()) ->getCanonicalDecl(); - Pos = ParamPositions[PVD]; + auto It = ParamPositions.find(PVD); + assert(It != ParamPositions.end() && "Function parameter not found"); + Pos = It->second; if (auto *P = dyn_cast<PointerType>(PVD->getType())) PtrRescalingFactor = CGM.getContext() .getTypeSizeInChars(P->getPointeeType()) .getQuantity(); + else if (PVD->getType()->isReferenceType()) { + IsReferenceType = true; + PtrRescalingFactor = + CGM.getContext() + .getTypeSizeInChars(PVD->getType().getNonReferenceType()) + .getQuantity(); + } } ParamAttrTy &ParamAttr = ParamAttrs[Pos]; - ParamAttr.Kind = Linear; + if (*MI == OMPC_LINEAR_ref) + ParamAttr.Kind = LinearRef; + else if (*MI == OMPC_LINEAR_uval) + ParamAttr.Kind = LinearUVal; + else if (IsReferenceType) + ParamAttr.Kind = LinearVal; + else + ParamAttr.Kind = Linear; // Assuming a stride of 1, for `linear` without modifiers. ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1); if (*SI) { @@ -11601,10 +11062,13 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) { if (const auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) { - if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) { - ParamAttr.Kind = LinearWithVarStride; - ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned( - ParamPositions[StridePVD->getCanonicalDecl()]); + if (const auto *StridePVD = + dyn_cast<ParmVarDecl>(DRE->getDecl())) { + ParamAttr.HasVarStride = true; + auto It = ParamPositions.find(StridePVD->getCanonicalDecl()); + assert(It != ParamPositions.end() && + "Function parameter not found"); + ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second); } } } else { @@ -11614,7 +11078,8 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, // If we are using a linear clause on a pointer, we need to // rescale the value of linear_step with the byte size of the // pointee type. - if (Linear == ParamAttr.Kind) + if (!ParamAttr.HasVarStride && + (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef)) ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor; ++SI; ++MI; @@ -11635,7 +11100,7 @@ void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD, if (CGM.getTarget().hasFeature("sve")) emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, MangledName, 's', 128, Fn, ExprLoc); - if (CGM.getTarget().hasFeature("neon")) + else if (CGM.getTarget().hasFeature("neon")) emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State, MangledName, 'n', 128, Fn, ExprLoc); } @@ -11695,8 +11160,8 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl()); } llvm::APInt Size(/*numBits=*/32, NumIterations.size()); - QualType ArrayTy = - C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0); + QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr, + ArraySizeModifier::Normal, 0); Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims"); CGF.EmitNullInitialization(DimsAddr, ArrayTy); @@ -11726,7 +11191,7 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, getThreadID(CGF, D.getBeginLoc()), llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(), + CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).emitRawPointer(CGF), CGM.VoidPtrTy)}; llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction( @@ -11737,16 +11202,18 @@ void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF, llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_doacross_fini); CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn, - llvm::makeArrayRef(FiniArgs)); + llvm::ArrayRef(FiniArgs)); } -void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, - const OMPDependClause *C) { +template <typename T> +static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM, + const T *C, llvm::Value *ULoc, + llvm::Value *ThreadID) { QualType Int64Ty = CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); llvm::APInt Size(/*numBits=*/32, C->getNumLoops()); QualType ArrayTy = CGM.getContext().getConstantArrayType( - Int64Ty, Size, nullptr, ArrayType::Normal, 0); + Int64Ty, Size, nullptr, ArraySizeModifier::Normal, 0); Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr"); for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) { const Expr *CounterVal = C->getLoopData(I); @@ -11758,21 +11225,36 @@ void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, /*Volatile=*/false, Int64Ty); } llvm::Value *Args[] = { - emitUpdateLocation(CGF, C->getBeginLoc()), - getThreadID(CGF, C->getBeginLoc()), - CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()}; + ULoc, ThreadID, + CGF.Builder.CreateConstArrayGEP(CntAddr, 0).emitRawPointer(CGF)}; llvm::FunctionCallee RTLFn; - if (C->getDependencyKind() == OMPC_DEPEND_source) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + OMPDoacrossKind<T> ODK; + if (ODK.isSource(C)) { RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_doacross_post); } else { - assert(C->getDependencyKind() == OMPC_DEPEND_sink); + assert(ODK.isSink(C) && "Expect sink modifier."); RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), OMPRTL___kmpc_doacross_wait); } CGF.EmitRuntimeCall(RTLFn, Args); } +void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDependClause *C) { + return EmitDoacrossOrdered<OMPDependClause>( + CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), + getThreadID(CGF, C->getBeginLoc())); +} + +void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDoacrossClause *C) { + return EmitDoacrossOrdered<OMPDoacrossClause>( + CGF, CGM, C, emitUpdateLocation(CGF, C->getBeginLoc()), + getThreadID(CGF, C->getBeginLoc())); +} + void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef<llvm::Value *> Args) const { @@ -11806,6 +11288,36 @@ Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF, return CGF.GetAddrOfLocalVar(NativeParam); } +/// Return allocator value from expression, or return a null allocator (default +/// when no allocator specified). +static llvm::Value *getAllocatorVal(CodeGenFunction &CGF, + const Expr *Allocator) { + llvm::Value *AllocVal; + if (Allocator) { + AllocVal = CGF.EmitScalarExpr(Allocator); + // According to the standard, the original allocator type is a enum + // (integer). Convert to pointer type, if required. + AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), + CGF.getContext().VoidPtrTy, + Allocator->getExprLoc()); + } else { + // If no allocator specified, it defaults to the null allocator. + AllocVal = llvm::Constant::getNullValue( + CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy)); + } + return AllocVal; +} + +/// Return the alignment from an allocate directive if present. +static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) { + std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD); + + if (!AllocateAlignment) + return nullptr; + + return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity()); +} + Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD) { if (!VD) @@ -11842,20 +11354,20 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, } llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc()); const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); - assert(AA->getAllocator() && - "Expected allocator expression for non-default allocator."); - llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); - // According to the standard, the original allocator type is a enum - // (integer). Convert to pointer type, if required. - Allocator = CGF.EmitScalarConversion( - Allocator, AA->getAllocator()->getType(), CGF.getContext().VoidPtrTy, - AA->getAllocator()->getExprLoc()); - llvm::Value *Args[] = {ThreadID, Size, Allocator}; - - llvm::Value *Addr = - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_alloc), - Args, getName({CVD->getName(), ".void.addr"})); + const Expr *Allocator = AA->getAllocator(); + llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator); + llvm::Value *Alignment = getAlignmentValue(CGM, CVD); + SmallVector<llvm::Value *, 4> Args; + Args.push_back(ThreadID); + if (Alignment) + Args.push_back(Alignment); + Args.push_back(Size); + Args.push_back(AllocVal); + llvm::omp::RuntimeFunction FnID = + Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc; + llvm::Value *Addr = CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args, + getName({CVD->getName(), ".void.addr"})); llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_free); QualType Ty = CGM.getContext().getPointerType(CVD->getType()); @@ -11867,15 +11379,16 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, // Cleanup action for allocate support. class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup { llvm::FunctionCallee RTLFn; - unsigned LocEncoding; + SourceLocation::UIntTy LocEncoding; Address Addr; - const Expr *Allocator; + const Expr *AllocExpr; public: - OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, unsigned LocEncoding, - Address Addr, const Expr *Allocator) + OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn, + SourceLocation::UIntTy LocEncoding, Address Addr, + const Expr *AllocExpr) : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr), - Allocator(Allocator) {} + AllocExpr(AllocExpr) {} void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { if (!CGF.HaveInsertPoint()) return; @@ -11883,23 +11396,19 @@ Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF, Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID( CGF, SourceLocation::getFromRawEncoding(LocEncoding)); Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Addr.getPointer(), CGF.VoidPtrTy); - llvm::Value *AllocVal = CGF.EmitScalarExpr(Allocator); - // According to the standard, the original allocator type is a enum - // (integer). Convert to pointer type, if required. - AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(), - CGF.getContext().VoidPtrTy, - Allocator->getExprLoc()); + Addr.emitRawPointer(CGF), CGF.VoidPtrTy); + llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr); Args[2] = AllocVal; - CGF.EmitRuntimeCall(RTLFn, Args); } }; Address VDAddr = - UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align); + UntiedRealAddr.isValid() + ? UntiedRealAddr + : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align); CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>( NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(), - VDAddr, AA->getAllocator()); + VDAddr, Allocator); if (UntiedRealAddr.isValid()) if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) @@ -11951,8 +11460,8 @@ CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() { CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII( CodeGenFunction &CGF, - const llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, - std::pair<Address, Address>> &LocalVars) + const llvm::MapVector<CanonicalDeclPtr<const VarDecl>, + std::pair<Address, Address>> &LocalVars) : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) { if (!NeedToPush) return; @@ -11972,7 +11481,7 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const { return llvm::any_of( CGM.getOpenMPRuntime().NontemporalDeclsStack, - [VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; }); + [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); }); } void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis( @@ -12159,7 +11668,7 @@ Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF, CGF.EmitStoreOfScalar( llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)), FiredLVal); - return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF); + return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(); } namespace { @@ -12241,20 +11750,21 @@ void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF, // Last updated loop counter for the lastprivate conditional var. // int<xx> last_iv = 0; llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType()); - llvm::Constant *LastIV = - getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"})); + llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable( + LLIVTy, getName({UniqueDeclName, "iv"})); cast<llvm::GlobalVariable>(LastIV)->setAlignment( IVLVal.getAlignment().getAsAlign()); - LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType()); + LValue LastIVLVal = + CGF.MakeNaturalAlignRawAddrLValue(LastIV, IVLVal.getType()); // Last value of the lastprivate conditional. // decltype(priv_a) last_a; - llvm::Constant *Last = getOrCreateInternalVariable( + llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable( CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName); cast<llvm::GlobalVariable>(Last)->setAlignment( LVal.getAlignment().getAsAlign()); LValue LastLVal = - CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment()); + CGF.MakeRawAddrLValue(Last, LVal.getType(), LVal.getAlignment()); // Global loop counter. Required to handle inner parallel-for regions. // iv @@ -12344,8 +11854,9 @@ void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF, const FieldDecl* FiredDecl = std::get<2>(It->getSecond()); LValue PrivLVal = CGF.EmitLValue(FoundE); Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - PrivLVal.getAddress(CGF), - CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy))); + PrivLVal.getAddress(), + CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)), + CGF.ConvertTypeForMem(StructTy)); LValue BaseLVal = CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl); LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl); @@ -12381,7 +11892,7 @@ void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional( const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back()); for (const auto &Pair : It->DeclToUniqueName) { const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl()); - if (!CS->capturesVariable(VD) || IgnoredDecls.count(VD) > 0) + if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD)) continue; auto I = LPCI->getSecond().find(Pair.first); assert(I != LPCI->getSecond().end() && @@ -12426,21 +11937,23 @@ void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate( // The variable was not updated in the region - exit. if (!GV) return; - LValue LPLVal = CGF.MakeAddrLValue( + LValue LPLVal = CGF.MakeRawAddrLValue( GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment()); llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc); CGF.EmitStoreOfScalar(Res, PrivLVal); } llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction( - const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, - OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { + CodeGenFunction &CGF, const OMPExecutableDirective &D, + const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, + const RegionCodeGenTy &CodeGen) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12456,7 +11969,8 @@ void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef<llvm::Value *> CapturedVars, - const Expr *IfCond) { + const Expr *IfCond, + llvm::Value *NumThreads) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12473,6 +11987,13 @@ void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF, llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF, + const RegionCodeGenTy &MasterOpGen, + SourceLocation Loc, + const Expr *Filter) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) { llvm_unreachable("Not supported in SIMD-only mode"); @@ -12514,6 +12035,11 @@ void CGOpenMPSIMDRuntime::emitForDispatchInit( llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF, + SourceLocation Loc) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + void CGOpenMPSIMDRuntime::emitForStaticInit( CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) { @@ -12636,7 +12162,8 @@ Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, } void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, - SourceLocation Loc) { + SourceLocation Loc, + const OMPTaskDataTy &Data) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12698,7 +12225,8 @@ void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF, void CGOpenMPSIMDRuntime::emitTargetDataCalls( CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, - const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) { + const Expr *Device, const RegionCodeGenTy &CodeGen, + CGOpenMPRuntime::TargetDataInfo &Info) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -12719,6 +12247,11 @@ void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, llvm_unreachable("Not supported in SIMD-only mode"); } +void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF, + const OMPDoacrossClause *C) { + llvm_unreachable("Not supported in SIMD-only mode"); +} + const VarDecl * CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const { |