diff options
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 1726 |
1 files changed, 1426 insertions, 300 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index dc3899f0e4ea..cfd5eda8cc80 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -18,14 +18,22 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/Attr.h" #include "clang/AST/DeclOpenMP.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" #include "clang/AST/StmtOpenMP.h" +#include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PrettyStackTrace.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/AtomicOrdering.h" using namespace clang; using namespace CodeGen; using namespace llvm::omp; +static const VarDecl *getBaseDecl(const Expr *Ref); + namespace { /// Lexical scope for OpenMP executable constructs, that handles correct codegen /// for captured expressions. @@ -53,7 +61,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope { static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { return CGF.LambdaCaptureFields.lookup(VD) || (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || - (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl)); + (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) && + cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD)); } public: @@ -214,6 +223,12 @@ public: if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) CGF.EmitVarDecl(*OED); } + } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) { + for (const Expr *E : UDP->varlists()) { + const Decl *D = getBaseDecl(E); + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) + CGF.EmitVarDecl(*OED); + } } } if (!isOpenMPSimdDirective(S.getDirectiveKind())) @@ -365,26 +380,28 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) { } namespace { - /// Contains required data for proper outlined function codegen. - struct FunctionOptions { - /// Captured statement for which the function is generated. - const CapturedStmt *S = nullptr; - /// true if cast to/from UIntPtr is required for variables captured by - /// value. - const bool UIntPtrCastRequired = true; - /// true if only casted arguments must be registered as local args or VLA - /// sizes. - const bool RegisterCastedArgsOnly = false; - /// Name of the generated function. - const StringRef FunctionName; - explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, - bool RegisterCastedArgsOnly, - StringRef FunctionName) - : S(S), UIntPtrCastRequired(UIntPtrCastRequired), - RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), - FunctionName(FunctionName) {} - }; -} +/// Contains required data for proper outlined function codegen. +struct FunctionOptions { + /// Captured statement for which the function is generated. + const CapturedStmt *S = nullptr; + /// true if cast to/from UIntPtr is required for variables captured by + /// value. + const bool UIntPtrCastRequired = true; + /// true if only casted arguments must be registered as local args or VLA + /// sizes. + const bool RegisterCastedArgsOnly = false; + /// Name of the generated function. + const StringRef FunctionName; + /// Location of the non-debug version of the outlined function. + SourceLocation Loc; + explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, + bool RegisterCastedArgsOnly, StringRef FunctionName, + SourceLocation Loc) + : S(S), UIntPtrCastRequired(UIntPtrCastRequired), + RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), + FunctionName(FunctionName), Loc(Loc) {} +}; +} // namespace static llvm::Function *emitOutlinedFunctionPrologue( CodeGenFunction &CGF, FunctionArgList &Args, @@ -485,7 +502,9 @@ static llvm::Function *emitOutlinedFunctionPrologue( // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, - FO.S->getBeginLoc(), CD->getBody()->getBeginLoc()); + FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), + FO.UIntPtrCastRequired ? FO.Loc + : CD->getBody()->getBeginLoc()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (const FieldDecl *FD : RD->fields()) { @@ -560,7 +579,8 @@ static llvm::Function *emitOutlinedFunctionPrologue( } llvm::Function * -CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { +CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, + SourceLocation Loc) { assert( CapturedStmtInfo && "CapturedStmtInfo should be set when generating the captured function"); @@ -577,7 +597,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { if (NeedWrapperFunction) Out << "_debug__"; FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - Out.str()); + Out.str(), Loc); llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs, VLASizes, CXXThisValue, FO); CodeGenFunction::OMPPrivateScope LocalScope(*this); @@ -600,7 +620,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, /*RegisterCastedArgsOnly=*/true, - CapturedStmtInfo->getHelperName()); + CapturedStmtInfo->getHelperName(), Loc); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; Args.clear(); @@ -632,8 +652,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { } CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); } - CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(), - F, CallArgs); + CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); WrapperCGF.FinishFunction(); return WrapperF; } @@ -747,11 +766,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, getLangOpts().OpenMPIsDevice && isOpenMPTargetExecutionDirective(D.getDirectiveKind()); bool FirstprivateIsLastprivate = false; - llvm::DenseSet<const VarDecl *> Lastprivates; + llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { for (const auto *D : C->varlists()) - Lastprivates.insert( - cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl()); + Lastprivates.try_emplace( + cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), + C->getKind()); } llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; @@ -761,8 +781,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, bool MustEmitFirstprivateCopy = CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { - auto IRef = C->varlist_begin(); - auto InitsRef = C->inits().begin(); + const auto *IRef = C->varlist_begin(); + const auto *InitsRef = C->inits().begin(); for (const Expr *IInit : C->private_copies()) { const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); bool ThisFirstprivateIsLastprivate = @@ -853,14 +873,34 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, }); } else { Address OriginalAddr = OriginalLVal.getAddress(*this); - IsRegistered = PrivateScope.addPrivate( - OrigVD, [this, VDInit, OriginalAddr, VD]() { + IsRegistered = + PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD, + ThisFirstprivateIsLastprivate, + OrigVD, &Lastprivates, IRef]() { // Emit private VarDecl with copy init. // Remap temp VDInit variable to the address of the original // variable (for proper handling of captured global variables). setAddrOfLocalVar(VDInit, OriginalAddr); EmitDecl(*VD); LocalDeclMap.erase(VDInit); + if (ThisFirstprivateIsLastprivate && + Lastprivates[OrigVD->getCanonicalDecl()] == + OMPC_LASTPRIVATE_conditional) { + // Create/init special variable for lastprivate conditionals. + Address VDAddr = + CGM.getOpenMPRuntime().emitLastprivateConditionalInit( + *this, OrigVD); + llvm::Value *V = EmitLoadOfScalar( + MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(), + AlignmentSource::Decl), + (*IRef)->getExprLoc()); + EmitStoreOfScalar(V, + MakeAddrLValue(VDAddr, (*IRef)->getType(), + AlignmentSource::Decl)); + LocalDeclMap.erase(VD); + setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } return GetAddrOfLocalVar(VD); }); } @@ -990,8 +1030,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && !getLangOpts().OpenMPSimd) break; - auto IRef = C->varlist_begin(); - auto IDestRef = C->destination_exprs().begin(); + const auto *IRef = C->varlist_begin(); + const auto *IDestRef = C->destination_exprs().begin(); for (const Expr *IInit : C->private_copies()) { // Keep the address of the original variable for future update at the end // of the loop. @@ -1013,7 +1053,15 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit( // for 'firstprivate' clause. if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl()); - bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() { + bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C, + OrigVD]() { + if (C->getKind() == OMPC_LASTPRIVATE_conditional) { + Address VDAddr = + CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this, + OrigVD); + setAddrOfLocalVar(VD, VDAddr); + return VDAddr; + } // Emit private VarDecl with copy init. EmitDecl(*VD); return GetAddrOfLocalVar(VD); @@ -1099,7 +1147,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) PrivateAddr = Address(Builder.CreateLoad(PrivateAddr), - getNaturalTypeAlignment(RefTy->getPointeeType())); + CGM.getNaturalTypeAlignment(RefTy->getPointeeType())); // Store the last value to the private copy in the last iteration. if (C->getKind() == OMPC_LASTPRIVATE_conditional) CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( @@ -1122,7 +1170,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal( void CodeGenFunction::EmitOMPReductionClauseInit( const OMPExecutableDirective &D, - CodeGenFunction::OMPPrivateScope &PrivateScope) { + CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { if (!HaveInsertPoint()) return; SmallVector<const Expr *, 4> Shareds; @@ -1130,32 +1178,36 @@ void CodeGenFunction::EmitOMPReductionClauseInit( SmallVector<const Expr *, 4> ReductionOps; SmallVector<const Expr *, 4> LHSs; SmallVector<const Expr *, 4> RHSs; + OMPTaskDataTy Data; + SmallVector<const Expr *, 4> TaskLHSs; + SmallVector<const Expr *, 4> TaskRHSs; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Shareds.emplace_back(Ref); - Privates.emplace_back(*IPriv); - ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); + if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) + continue; + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + if (C->getModifier() == OMPC_REDUCTION_task) { + Data.ReductionVars.append(C->privates().begin(), C->privates().end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } } - ReductionCodeGen RedCG(Shareds, Privates, ReductionOps); + ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); unsigned Count = 0; - auto ILHS = LHSs.begin(); - auto IRHS = RHSs.begin(); - auto IPriv = Privates.begin(); + auto *ILHS = LHSs.begin(); + auto *IRHS = RHSs.begin(); + auto *IPriv = Privates.begin(); for (const Expr *IRef : Shareds) { const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl()); // Emit private VarDecl with reduction init. - RedCG.emitSharedLValue(*this, Count); + RedCG.emitSharedOrigLValue(*this, Count); RedCG.emitAggregateType(*this, Count); AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD); RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(), @@ -1222,6 +1274,118 @@ void CodeGenFunction::EmitOMPReductionClauseInit( ++IPriv; ++Count; } + if (!Data.ReductionVars.empty()) { + Data.IsReductionWithTaskMod = true; + Data.IsWorksharingReduction = + isOpenMPWorksharingDirective(D.getDirectiveKind()); + llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( + *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data); + const Expr *TaskRedRef = nullptr; + switch (D.getDirectiveKind()) { + case OMPD_parallel: + TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_for: + TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_sections: + TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_for: + TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_master: + TaskRedRef = + cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_parallel_sections: + TaskRedRef = + cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel: + TaskRedRef = + cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_target_parallel_for: + TaskRedRef = + cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_distribute_parallel_for: + TaskRedRef = + cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr(); + break; + case OMPD_teams_distribute_parallel_for: + TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D) + .getTaskReductionRefExpr(); + break; + case OMPD_target_teams_distribute_parallel_for: + TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D) + .getTaskReductionRefExpr(); + break; + case OMPD_simd: + case OMPD_for_simd: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_parallel_for_simd: + case OMPD_task: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_flush: + case OMPD_depobj: + case OMPD_scan: + case OMPD_ordered: + case OMPD_atomic: + case OMPD_teams: + case OMPD_target: + case OMPD_cancellation_point: + case OMPD_cancel: + case OMPD_target_data: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_master_taskloop: + case OMPD_master_taskloop_simd: + case OMPD_parallel_master_taskloop: + case OMPD_parallel_master_taskloop_simd: + case OMPD_distribute: + case OMPD_target_update: + case OMPD_distribute_parallel_for_simd: + case OMPD_distribute_simd: + case OMPD_target_parallel_for_simd: + case OMPD_target_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_target_teams_distribute_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_declare_simd: + case OMPD_requires: + case OMPD_declare_variant: + case OMPD_begin_declare_variant: + case OMPD_end_declare_variant: + case OMPD_unknown: + default: + llvm_unreachable("Enexpected directive with task reductions."); + } + + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl()); + EmitVarDecl(*VD); + EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD), + /*Volatile=*/false, TaskRedRef->getType()); + } } void CodeGenFunction::EmitOMPReductionClauseFinal( @@ -1233,14 +1397,25 @@ void CodeGenFunction::EmitOMPReductionClauseFinal( llvm::SmallVector<const Expr *, 8> RHSExprs; llvm::SmallVector<const Expr *, 8> ReductionOps; bool HasAtLeastOneReduction = false; + bool IsReductionWithTaskMod = false; for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { + // Do not emit for inscan reductions. + if (C->getModifier() == OMPC_REDUCTION_inscan) + continue; HasAtLeastOneReduction = true; Privates.append(C->privates().begin(), C->privates().end()); LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + IsReductionWithTaskMod = + IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; } if (HasAtLeastOneReduction) { + if (IsReductionWithTaskMod) { + CGM.getOpenMPRuntime().emitTaskReductionFini( + *this, D.getBeginLoc(), + isOpenMPWorksharingDirective(D.getDirectiveKind())); + } bool WithNowait = D.getSingleClause<OMPNowaitClause>() || isOpenMPParallelDirective(D.getDirectiveKind()) || ReductionKind == OMPD_simd; @@ -1288,6 +1463,63 @@ typedef llvm::function_ref<void(CodeGenFunction &, CodeGenBoundParametersTy; } // anonymous namespace +static void +checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, + const OMPExecutableDirective &S) { + if (CGF.getLangOpts().OpenMP < 50) + return; + llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + } + for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + } + for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); + } + } + // Privates should ne analyzed since they are not captured at all. + // Task reductions may be skipped - tasks are ignored. + // Firstprivates do not return value but may be passed by reference - no need + // to check for updated lastprivate conditional. + for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + if (!Ref->getType()->isScalarType()) + continue; + const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); + } + } + CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( + CGF, S, PrivateDecls); +} + static void emitCommonOMPParallelDirective( CodeGenFunction &CGF, const OMPExecutableDirective &S, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, @@ -1334,9 +1566,97 @@ static void emitEmptyBoundParameters(CodeGenFunction &, const OMPExecutableDirective &, llvm::SmallVectorImpl<llvm::Value *> &) {} -void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { +Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( + CodeGenFunction &CGF, const VarDecl *VD) { + CodeGenModule &CGM = CGF.CGM; + auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + + if (!VD) + return Address::invalid(); + const VarDecl *CVD = VD->getCanonicalDecl(); + if (!CVD->hasAttr<OMPAllocateDeclAttr>()) + return Address::invalid(); + const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); + // Use the default allocation. + if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && + !AA->getAllocator()) + return Address::invalid(); + llvm::Value *Size; + CharUnits Align = CGM.getContext().getDeclAlign(CVD); + if (CVD->getType()->isVariablyModifiedType()) { + Size = CGF.getTypeSize(CVD->getType()); + // Align the size: ((size + align - 1) / align) * align + Size = CGF.Builder.CreateNUWAdd( + Size, CGM.getSize(Align - CharUnits::fromQuantity(1))); + Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align)); + Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align)); + } else { + CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); + Size = CGM.getSize(Sz.alignTo(Align)); + } + + assert(AA->getAllocator() && + "Expected allocator expression for non-default allocator."); + llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator()); + // According to the standard, the original allocator type is a enum (integer). + // Convert to pointer type, if required. + if (Allocator->getType()->isIntegerTy()) + Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy); + else if (Allocator->getType()->isPointerTy()) + Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator, + CGM.VoidPtrTy); + + llvm::Value *Addr = OMPBuilder.CreateOMPAlloc( + CGF.Builder, Size, Allocator, + getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", ".")); + llvm::CallInst *FreeCI = + OMPBuilder.CreateOMPFree(CGF.Builder, Addr, Allocator); + + CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI); + Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Addr, + CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())), + getNameWithSeparators({CVD->getName(), ".addr"}, ".", ".")); + return Address(Addr, Align); +} + +Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( + CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, + SourceLocation Loc) { + CodeGenModule &CGM = CGF.CGM; + if (CGM.getLangOpts().OpenMPUseTLS && + CGM.getContext().getTargetInfo().isTLSSupported()) + return VDAddr; + + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + + llvm::Type *VarTy = VDAddr.getElementType(); + llvm::Value *Data = + CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy); + llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)); + std::string Suffix = getNameWithSeparators({"cache", ""}); + llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix); + + llvm::CallInst *ThreadPrivateCacheCall = + OMPBuilder.CreateCachedThreadPrivate(CGF.Builder, Data, Size, CacheName); + + return Address(ThreadPrivateCacheCall, VDAddr.getAlignment()); +} - if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { +std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( + ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { + SmallString<128> Buffer; + llvm::raw_svector_ostream OS(Buffer); + StringRef Sep = FirstSeparator; + for (StringRef Part : Parts) { + OS << Sep << Part; + Sep = Separator; + } + return OS.str().str(); +} +void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // Check if we have any if clause associated with the directive. llvm::Value *IfCond = nullptr; if (const auto *C = S.getSingleClause<OMPIfClause>()) @@ -1357,15 +1677,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { // The cleanup callback that finalizes all variabels at the given location, // thus calls destructors etc. auto FiniCB = [this](InsertPointTy IP) { - CGBuilderTy::InsertPointGuard IPG(Builder); - assert(IP.getBlock()->end() != IP.getPoint() && - "OpenMP IR Builder should cause terminated block!"); - llvm::BasicBlock *IPBB = IP.getBlock(); - llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint()); - IPBB->getTerminator()->eraseFromParent(); - Builder.SetInsertPoint(IPBB); - CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB); - EmitBranchThroughCleanup(Dest); + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); }; // Privatization callback that performs appropriate action for @@ -1387,32 +1699,17 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { auto BodyGenCB = [ParallelRegionBodyStmt, this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, llvm::BasicBlock &ContinuationBB) { - auto OldAllocaIP = AllocaInsertPt; - AllocaInsertPt = &*AllocaIP.getPoint(); - - auto OldReturnBlock = ReturnBlock; - ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB); - - llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); - CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint()); - llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator(); - CodeGenIPBBTI->removeFromParent(); - - Builder.SetInsertPoint(CodeGenIPBB); - - EmitStmt(ParallelRegionBodyStmt); - - Builder.Insert(CodeGenIPBBTI); - - AllocaInsertPt = OldAllocaIP; - ReturnBlock = OldReturnBlock; + OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP, + ContinuationBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt, + CodeGenIP, ContinuationBB); }; CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); - Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB, - FiniCB, IfCond, NumThreads, - ProcBind, S.hasCancel())); + Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB, + FiniCB, IfCond, NumThreads, + ProcBind, S.hasCancel())); return; } @@ -1436,10 +1733,16 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, - emitEmptyBoundParameters); - emitPostUpdateForReductionClause(*this, S, - [](CodeGenFunction &) { return nullptr; }); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, @@ -1506,6 +1809,27 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, getProfileCount(D.getBody())); EmitBlock(NextBB); } + + OMPPrivateScope InscanScope(*this); + EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); + bool IsInscanRegion = InscanScope.Privatize(); + if (IsInscanRegion) { + // Need to remember the block before and after scan directive + // to dispatch them correctly depending on the clause used in + // this directive, inclusive or exclusive. For inclusive scan the natural + // order of the blocks is used, for exclusive clause the blocks must be + // executed in reverse order. + OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb"); + OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb"); + // No need to allocate inscan exit block, in simd mode it is selected in the + // codegen for the scan directive. + if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) + OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb"); + OMPScanDispatch = createBasicBlock("omp.inscan.dispatch"); + EmitBranch(OMPScanDispatch); + EmitBlock(OMPBeforeScanBlock); + } + // Emit loop variables for C++ range loops. const Stmt *Body = D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); @@ -1515,13 +1839,17 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, Body, /*TryImperfectlyNestedLoops=*/true), D.getCollapsedNumber()); + // Jump to the dispatcher at the end of the loop body. + if (IsInscanRegion) + EmitBranch(OMPScanExitBlock); + // The end (updates/cleanups). EmitBlock(Continue.getBlock()); BreakContinueStack.pop_back(); } void CodeGenFunction::EmitOMPInnerLoop( - const Stmt &S, bool RequiresCleanup, const Expr *LoopCond, + const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, const llvm::function_ref<void(CodeGenFunction &)> BodyGen, const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { @@ -1531,8 +1859,19 @@ void CodeGenFunction::EmitOMPInnerLoop( auto CondBlock = createBasicBlock("omp.inner.for.cond"); EmitBlock(CondBlock); const SourceRange R = S.getSourceRange(); - LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + + // If attributes are attached, push to the basic block with them. + const auto &OMPED = cast<OMPExecutableDirective>(S); + const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); + const Stmt *SS = ICS->getCapturedStmt(); + const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); + if (AS) + LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), + AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); + else + LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); // If there are any cleanups between here and the loop-exit scope, // create a block to stage a loop exit along. @@ -1671,7 +2010,7 @@ static void emitAlignedClause(CodeGenFunction &CGF, "alignment is not power of 2"); if (Alignment != 0) { llvm::Value *PtrValue = CGF.EmitScalarExpr(E); - CGF.EmitAlignmentAssumption( + CGF.emitAlignmentAssumption( PtrValue, E, /*No second loc needed*/ SourceLocation(), llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); } @@ -1835,6 +2174,18 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, LoopStack.setParallel(!IsMonotonic); LoopStack.setVectorizeEnable(); emitSimdlenSafelenClause(*this, D, IsMonotonic); + if (const auto *C = D.getSingleClause<OMPOrderClause>()) + if (C->getKind() == OMPC_ORDER_concurrent) + LoopStack.setParallel(/*Enable=*/true); + if ((D.getDirectiveKind() == OMPD_simd || + (getLangOpts().OpenMPSimd && + isOpenMPSimdDirective(D.getDirectiveKind()))) && + llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) + // Disable parallel access in case of prefix sum. + LoopStack.setParallel(/*Enable=*/false); } void CodeGenFunction::EmitOMPSimdFinal( @@ -1886,7 +2237,6 @@ void CodeGenFunction::EmitOMPSimdFinal( static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, const OMPLoopDirective &S, CodeGenFunction::JumpDest LoopExit) { - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); CGF.EmitOMPLoopBody(S, LoopExit); CGF.EmitStopPoint(&S); } @@ -1917,12 +2267,14 @@ static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, BodyCodeGen(CGF); }; const Expr *IfCond = nullptr; - for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { - if (CGF.getLangOpts().OpenMP >= 50 && - (C->getNameModifier() == OMPD_unknown || - C->getNameModifier() == OMPD_simd)) { - IfCond = C->getCondition(); - break; + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (CGF.getLangOpts().OpenMP >= 50 && + (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_simd)) { + IfCond = C->getCondition(); + break; + } } } if (IfCond) { @@ -2007,10 +2359,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, CGF.EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter( - CGF, S); - CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); - CGF.EmitStopPoint(&S); + emitOMPLoopBodyWithStopPoint(CGF, S, + CodeGenFunction::JumpDest()); }, [](CodeGenFunction &) {}); }); @@ -2031,11 +2381,19 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + ParentLoopDirectiveForScanRegion ScanRegion(*this, S); + OMPFirstScanLoop = true; auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitOMPSimdRegion(CGF, S, Action); }; - OMPLexicalScope Scope(*this, S, OMPD_unknown); - CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + OMPLexicalScope Scope(*this, S, OMPD_unknown); + CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPOuterLoop( @@ -2103,10 +2461,14 @@ void CodeGenFunction::EmitOMPOuterLoop( [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { // Generate !llvm.loop.parallel metadata for loads and stores for loops // with dynamic/guided scheduling and without ordered clause. - if (!isOpenMPSimdDirective(S.getDirectiveKind())) + if (!isOpenMPSimdDirective(S.getDirectiveKind())) { CGF.LoopStack.setParallel(!IsMonotonic); - else + if (const auto *C = S.getSingleClause<OMPOrderClause>()) + if (C->getKind() == OMPC_ORDER_concurrent) + CGF.LoopStack.setParallel(/*Enable=*/true); + } else { CGF.EmitOMPSimdInit(S, IsMonotonic); + } }, [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { @@ -2612,6 +2974,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule, /* Chunked */ Chunk != nullptr) && HasChunkSizeOne && isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); + bool IsMonotonic = + Ordered || + ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || + ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && + !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || + ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || + ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || + ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; if ((RT.isStaticNonchunked(ScheduleKind.Schedule, /* Chunked */ Chunk != nullptr) || StaticChunkedOne) && @@ -2620,9 +2990,13 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); emitCommonSimdLoop( *this, S, - [&S](CodeGenFunction &CGF, PrePostActionTy &) { - if (isOpenMPSimdDirective(S.getDirectiveKind())) - CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { + if (isOpenMPSimdDirective(S.getDirectiveKind())) { + CGF.EmitOMPSimdInit(S, IsMonotonic); + } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { + if (C->getKind() == OMPC_ORDER_concurrent) + CGF.LoopStack.setParallel(/*Enable=*/true); + } }, [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, &S, ScheduleKind, LoopExit, @@ -2663,10 +3037,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( : S.getCond(), StaticChunkedOne ? S.getDistInc() : S.getInc(), [&S, LoopExit](CodeGenFunction &CGF) { - CGF.CGM.getOpenMPRuntime() - .initLastprivateConditionalCounter(CGF, S); - CGF.EmitOMPLoopBody(S, LoopExit); - CGF.EmitStopPoint(&S); + emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); }, [](CodeGenFunction &) {}); }); @@ -2678,11 +3049,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( }; OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); } else { - const bool IsMonotonic = - Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static || - ScheduleKind.Schedule == OMPC_SCHEDULE_unknown || - ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || - ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. const OMPLoopArguments LoopArguments( @@ -2755,16 +3121,233 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, return {LBVal, UBVal}; } +/// Emits the code for the directive with inscan reductions. +/// The code is the following: +/// \code +/// size num_iters = <num_iters>; +/// <type> buffer[num_iters]; +/// #pragma omp ... +/// for (i: 0..<num_iters>) { +/// <input phase>; +/// buffer[i] = red; +/// } +/// for (int k = 0; k != ceil(log2(num_iters)); ++k) +/// for (size cnt = last_iter; cnt >= pow(2, k); --k) +/// buffer[i] op= buffer[i-pow(2,k)]; +/// #pragma omp ... +/// for (0..<num_iters>) { +/// red = InclusiveScan ? buffer[i] : buffer[i-1]; +/// <scan phase>; +/// } +/// \endcode +static void emitScanBasedDirective( + CodeGenFunction &CGF, const OMPLoopDirective &S, + llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, + llvm::function_ref<void(CodeGenFunction &)> FirstGen, + llvm::function_ref<void(CodeGenFunction &)> SecondGen) { + llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( + NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + SmallVector<const Expr *, 4> CopyOps; + SmallVector<const Expr *, 4> CopyArrayTemps; + SmallVector<const Expr *, 4> CopyArrayElems; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + assert(C->getModifier() == OMPC_REDUCTION_inscan && + "Only inscan reductions are expected."); + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayTemps.append(C->copy_array_temps().begin(), + C->copy_array_temps().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + { + // Emit buffers for each reduction variables. + // ReductionCodeGen is required to emit correctly the code for array + // reductions. + ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); + unsigned Count = 0; + auto *ITA = CopyArrayTemps.begin(); + for (const Expr *IRef : Privates) { + const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + // Emit variably modified arrays, used for arrays/array sections + // reductions. + if (PrivateVD->getType()->isVariablyModifiedType()) { + RedCG.emitSharedOrigLValue(CGF, Count); + RedCG.emitAggregateType(CGF, Count); + } + CodeGenFunction::OpaqueValueMapping DimMapping( + CGF, + cast<OpaqueValueExpr>( + cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) + ->getSizeExpr()), + RValue::get(OMPScanNumIterations)); + // Emit temp buffer. + CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl())); + ++ITA; + ++Count; + } + } + CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); + { + // Emit loop with input phase: + // #pragma omp ... + // for (i: 0..<num_iters>) { + // <input phase>; + // buffer[i] = red; + // } + CGF.OMPFirstScanLoop = true; + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + FirstGen(CGF); + } + // Emit prefix reduction: + // for (int k = 0; k <= ceil(log2(n)); ++k) + llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); + llvm::Value *Arg = + CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); + llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); + F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); + LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); + LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); + llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( + OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); + CGF.EmitBlock(LoopBB); + auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); + // size pow2k = 1; + auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); + Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); + // for (size i = n - 1; i >= 2 ^ k; --i) + // tmp[i] op= tmp[i-pow2k]; + llvm::BasicBlock *InnerLoopBB = + CGF.createBasicBlock("omp.inner.log.scan.body"); + llvm::BasicBlock *InnerExitBB = + CGF.createBasicBlock("omp.inner.log.scan.exit"); + llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerLoopBB); + auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + IVal->addIncoming(NMin1, LoopBB); + { + CodeGenFunction::OMPPrivateScope PrivScope(CGF); + auto *ILHS = LHSs.begin(); + auto *IRHS = RHSs.begin(); + for (const Expr *CopyArrayElem : CopyArrayElems) { + const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + Address LHSAddr = Address::invalid(); + { + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(IVal)); + LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); + Address RHSAddr = Address::invalid(); + { + llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(OffsetIVal)); + RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); + ++ILHS; + ++IRHS; + } + PrivScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitReduction( + CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, + {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); + } + llvm::Value *NextIVal = + CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); + IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); + CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerExitBB); + llvm::Value *Next = + CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); + Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); + // pow2k <<= 1; + llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); + Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); + llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); + CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); + auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); + CGF.EmitBlock(ExitBB); + + CGF.OMPFirstScanLoop = false; + SecondGen(CGF); +} + +static bool emitWorksharingDirective(CodeGenFunction &CGF, + const OMPLoopDirective &S, + bool HasCancel) { + bool HasLastprivates; + if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) { + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, S.getDirectiveKind(), HasCancel); + (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + // Emit an implicit barrier at the end. + CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), + OMPD_for); + }; + const auto &&SecondGen = [&S, HasCancel, + &HasLastprivates](CodeGenFunction &CGF) { + CodeGenFunction::OMPCancelStackRAII CancelRegion( + CGF, S.getDirectiveKind(), HasCancel); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + }; + emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); + } else { + CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), + HasCancel); + HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), + emitForLoopBounds, + emitDispatchForLoopBounds); + } + return HasLastprivates; +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel()); - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); }; { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, S.hasCancel()); @@ -2773,17 +3356,19 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), - emitForLoopBounds, - emitDispatchForLoopBounds); + HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); } @@ -2791,6 +3376,8 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { // Emit an implicit barrier at the end. if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, @@ -2808,7 +3395,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { bool HasLastprivates = false; auto &&CodeGen = [&S, CapturedStmt, CS, &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { - ASTContext &C = CGF.getContext(); + const ASTContext &C = CGF.getContext(); QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); // Emit helper vars inits. @@ -2830,11 +3417,13 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. - BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, - OK_Ordinary, S.getBeginLoc(), FPOptions()); + BinaryOperator *Cond = BinaryOperator::Create( + C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, + S.getBeginLoc(), FPOptionsOverride()); // Increment for loop counter. - UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, - S.getBeginLoc(), true); + UnaryOperator *Inc = UnaryOperator::Create( + C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, + S.getBeginLoc(), true, FPOptionsOverride()); auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { // Iterate through all sections and emit a switch construct: // switch (IV) { @@ -2847,7 +3436,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // break; // } // .omp.sections.exit: - CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S); llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit"); llvm::SwitchInst *SwitchStmt = CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()), @@ -2905,7 +3493,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { // IV = LB; CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV); // while (idx <= UB) { BODY; ++idx; } - CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen, + CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, [](CodeGenFunction &) {}); // Tell the runtime we are done. auto &&CodeGen = [&S](CodeGenFunction &CGF) { @@ -2949,6 +3537,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); EmitSections(S); } @@ -2957,6 +3547,8 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_sections); } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { @@ -2995,6 +3587,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); }; { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(), CopyprivateVars, DestExprs, @@ -3007,6 +3601,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { *this, S.getBeginLoc(), S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { @@ -3018,11 +3614,75 @@ static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { } void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt(); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt, + CodeGenIP, FiniBB); + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder.CreateMaster(Builder, BodyGenCB, FiniCB)); + + return; + } OMPLexicalScope Scope(*this, S, OMPD_unknown); emitMaster(*this, S); } void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + const CapturedStmt *CS = S.getInnermostCapturedStmt(); + const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt(); + const Expr *Hint = nullptr; + if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) + Hint = HintClause->getHint(); + + // TODO: This is slightly different from what's currently being done in + // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything + // about typing is final. + llvm::Value *HintInst = nullptr; + if (Hint) + HintInst = + Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt, + CodeGenIP, FiniBB); + }; + + CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + Builder.restoreIP(OMPBuilder.CreateCritical( + Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(), + HintInst)); + + return; + } + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); @@ -3042,12 +3702,16 @@ void CodeGenFunction::EmitOMPParallelForDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel()); - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + (void)emitWorksharingDirective(CGF, S, S.hasCancel()); }; - emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, - emitEmptyBoundParameters); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPParallelForSimdDirective( @@ -3056,11 +3720,16 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( // directives: 'parallel' with 'for' directive. auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { Action.Enter(CGF); - CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds, - emitDispatchForLoopBounds); + (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; - emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen, - emitEmptyBoundParameters); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPParallelMasterDirective( @@ -3086,10 +3755,16 @@ void CodeGenFunction::EmitOMPParallelMasterDirective( emitMaster(CGF, S); CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); }; - emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, - emitEmptyBoundParameters); - emitPostUpdateForReductionClause(*this, S, - [](CodeGenFunction &) { return nullptr; }); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, + emitEmptyBoundParameters); + emitPostUpdateForReductionClause(*this, S, + [](CodeGenFunction &) { return nullptr; }); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPParallelSectionsDirective( @@ -3100,8 +3775,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( Action.Enter(CGF); CGF.EmitSections(S); }; - emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, - emitEmptyBoundParameters); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); + emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, + emitEmptyBoundParameters); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, S); } void CodeGenFunction::EmitOMPTaskBasedDirective( @@ -3188,33 +3869,28 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( SmallVector<const Expr *, 4> LHSs; SmallVector<const Expr *, 4> RHSs; for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( *this, S.getBeginLoc(), LHSs, RHSs, Data); // Build list of dependences. - for (const auto *C : S.getClausesOfKind<OMPDependClause>()) - for (const Expr *IRef : C->varlists()) - Data.Dependences.emplace_back(C->getDependencyKind(), IRef); + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { + OMPTaskDataTy::DependData &DD = + Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); + DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); + } auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); + llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty()) { llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( @@ -3241,6 +3917,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), ".firstpriv.ptr.addr"); PrivatePtrs.emplace_back(VD, PrivatePtr); + FirstprivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); } for (const Expr *E : Data.LastprivateVars) { @@ -3271,13 +3948,21 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( } } if (Data.Reductions) { + OMPPrivateScope FirstprivateScope(CGF); + for (const auto &Pair : FirstprivatePtrs) { + Address Replacement(CGF.Builder.CreateLoad(Pair.second), + CGF.getContext().getDeclAlign(Pair.first)); + FirstprivateScope.addPrivate(Pair.first, + [Replacement]() { return Replacement; }); + } + (void)FirstprivateScope.Privatize(); OMPLexicalScope LexScope(CGF, S, CapturedRegion); - ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies, - Data.ReductionOps); + ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, + Data.ReductionCopies, Data.ReductionOps); llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9))); for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { - RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitSharedOrigLValue(CGF, Cnt); RedCG.emitAggregateType(CGF, Cnt); // FIXME: This must removed once the runtime library is fixed. // Emit required threadprivate variables for @@ -3322,9 +4007,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( // privatized earlier. OMPPrivateScope InRedScope(CGF); if (!InRedVars.empty()) { - ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps); + ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { - RedCG.emitSharedLValue(CGF, Cnt); + RedCG.emitSharedOrigLValue(CGF, Cnt); RedCG.emitAggregateType(CGF, Cnt); // The taskgroup descriptor variable is always implicit firstprivate and // privatized already during processing of the firstprivates. @@ -3333,9 +4018,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( // initializer/combiner/finalizer. CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(), RedCG, Cnt); - llvm::Value *ReductionsPtr = - CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]), - TaskgroupDescriptors[Cnt]->getExprLoc()); + llvm::Value *ReductionsPtr; + if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { + ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr), + TRExpr->getExprLoc()); + } else { + ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt)); Replacement = Address( @@ -3448,9 +4137,11 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( } (void)TargetScope.Privatize(); // Build list of dependences. - for (const auto *C : S.getClausesOfKind<OMPDependClause>()) - for (const Expr *IRef : C->varlists()) - Data.Dependences.emplace_back(C->getDependencyKind(), IRef); + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { + OMPTaskDataTy::DependData &DD = + Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier()); + DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); + } auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { // Set proper addresses for generated private copies. @@ -3537,6 +4228,8 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { SharedsTy, CapturedStruct, IfCond, Data); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); } @@ -3562,21 +4255,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( SmallVector<const Expr *, 4> RHSs; OMPTaskDataTy Data; for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { - auto IPriv = C->privates().begin(); - auto IRed = C->reduction_ops().begin(); - auto ILHS = C->lhs_exprs().begin(); - auto IRHS = C->rhs_exprs().begin(); - for (const Expr *Ref : C->varlists()) { - Data.ReductionVars.emplace_back(Ref); - Data.ReductionCopies.emplace_back(*IPriv); - Data.ReductionOps.emplace_back(*IRed); - LHSs.emplace_back(*ILHS); - RHSs.emplace_back(*IRHS); - std::advance(IPriv, 1); - std::advance(IRed, 1); - std::advance(ILHS, 1); - std::advance(IRHS, 1); - } + Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); + Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); + Data.ReductionOps.append(C->reduction_ops().begin(), + C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); } llvm::Value *ReductionDesc = CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(), @@ -3593,6 +4278,9 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( } void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { + llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() + ? llvm::AtomicOrdering::NotAtomic + : llvm::AtomicOrdering::AcquireRelease; CGM.getOpenMPRuntime().emitFlush( *this, [&S]() -> ArrayRef<const Expr *> { @@ -3601,7 +4289,233 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { FlushClause->varlist_end()); return llvm::None; }(), - S.getBeginLoc()); + S.getBeginLoc(), AO); +} + +void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { + const auto *DO = S.getSingleClause<OMPDepobjClause>(); + LValue DOLVal = EmitLValue(DO->getDepobj()); + if (const auto *DC = S.getSingleClause<OMPDependClause>()) { + OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), + DC->getModifier()); + Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); + Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( + *this, Dependencies, DC->getBeginLoc()); + EmitStoreOfScalar(DepAddr.getPointer(), DOLVal); + return; + } + if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { + CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc()); + return; + } + if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { + CGM.getOpenMPRuntime().emitUpdateClause( + *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc()); + return; + } +} + +void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { + if (!OMPParentLoopDirectiveForScan) + return; + const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; + bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); + SmallVector<const Expr *, 4> Shareds; + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> CopyOps; + SmallVector<const Expr *, 4> CopyArrayTemps; + SmallVector<const Expr *, 4> CopyArrayElems; + for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { + if (C->getModifier() != OMPC_REDUCTION_inscan) + continue; + Shareds.append(C->varlist_begin(), C->varlist_end()); + Privates.append(C->privates().begin(), C->privates().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayTemps.append(C->copy_array_temps().begin(), + C->copy_array_temps().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + if (ParentDir.getDirectiveKind() == OMPD_simd || + (getLangOpts().OpenMPSimd && + isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { + // For simd directive and simd-based directives in simd only mode, use the + // following codegen: + // int x = 0; + // #pragma omp simd reduction(inscan, +: x) + // for (..) { + // <first part> + // #pragma omp scan inclusive(x) + // <second part> + // } + // is transformed to: + // int x = 0; + // for (..) { + // int x_priv = 0; + // <first part> + // x = x_priv + x; + // x_priv = x; + // <second part> + // } + // and + // int x = 0; + // #pragma omp simd reduction(inscan, +: x) + // for (..) { + // <first part> + // #pragma omp scan exclusive(x) + // <second part> + // } + // to + // int x = 0; + // for (..) { + // int x_priv = 0; + // <second part> + // int temp = x; + // x = x_priv + x; + // x_priv = temp; + // <first part> + // } + llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); + EmitBranch(IsInclusive + ? OMPScanReduce + : BreakContinueStack.back().ContinueBlock.getBlock()); + EmitBlock(OMPScanDispatch); + { + // New scope for correct construction/destruction of temp variables for + // exclusive scan. + LexicalScope Scope(*this, S.getSourceRange()); + EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); + EmitBlock(OMPScanReduce); + if (!IsInclusive) { + // Create temp var and copy LHS value to this temp value. + // TMP = LHS; + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *TempExpr = CopyArrayTemps[I]; + EmitAutoVarDecl( + *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl())); + LValue DestLVal = EmitLValue(TempExpr); + LValue SrcLVal = EmitLValue(LHSs[I]); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + CGM.getOpenMPRuntime().emitReduction( + *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, + {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + LValue DestLVal; + LValue SrcLVal; + if (IsInclusive) { + DestLVal = EmitLValue(RHSs[I]); + SrcLVal = EmitLValue(LHSs[I]); + } else { + const Expr *TempExpr = CopyArrayTemps[I]; + DestLVal = EmitLValue(RHSs[I]); + SrcLVal = EmitLValue(TempExpr); + } + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); + OMPScanExitBlock = IsInclusive + ? BreakContinueStack.back().ContinueBlock.getBlock() + : OMPScanReduce; + EmitBlock(OMPAfterScanBlock); + return; + } + if (!IsInclusive) { + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + EmitBlock(OMPScanExitBlock); + } + if (OMPFirstScanLoop) { + // Emit buffer[i] = red; at the end of the input phase. + const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) + .getIterationVariable() + ->IgnoreParenImpCasts(); + LValue IdxLVal = EmitLValue(IVExpr); + llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); + IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + OpaqueValueMapping IdxMapping( + *this, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(IdxVal)); + LValue DestLVal = EmitLValue(CopyArrayElem); + LValue SrcLVal = EmitLValue(OrigExpr); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + if (IsInclusive) { + EmitBlock(OMPScanExitBlock); + EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); + } + EmitBlock(OMPScanDispatch); + if (!OMPFirstScanLoop) { + // Emit red = buffer[i]; at the entrance to the scan phase. + const auto *IVExpr = cast<OMPLoopDirective>(ParentDir) + .getIterationVariable() + ->IgnoreParenImpCasts(); + LValue IdxLVal = EmitLValue(IVExpr); + llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc()); + IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false); + llvm::BasicBlock *ExclusiveExitBB = nullptr; + if (!IsInclusive) { + llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec"); + ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit"); + llvm::Value *Cmp = Builder.CreateIsNull(IdxVal); + Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB); + EmitBlock(ContBB); + // Use idx - 1 iteration for exclusive scan. + IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1)); + } + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + OpaqueValueMapping IdxMapping( + *this, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(IdxVal)); + LValue SrcLVal = EmitLValue(CopyArrayElem); + LValue DestLVal = EmitLValue(OrigExpr); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()), + cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()), + CopyOps[I]); + } + if (!IsInclusive) { + EmitBlock(ExclusiveExitBB); + } + } + EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock + : OMPAfterScanBlock); + EmitBlock(OMPAfterScanBlock); } void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, @@ -3790,7 +4704,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, }); EmitBlock(LoopExit.getBlock()); // Tell the runtime we are done. - RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind()); + RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind()); } else { // Emit the outer loop, which requests its work chunk [LB..UB] from // runtime and runs the inner loop to process it. @@ -3843,11 +4757,12 @@ void CodeGenFunction::EmitOMPDistributeDirective( } static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, - const CapturedStmt *S) { + const CapturedStmt *S, + SourceLocation Loc) { CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; CGF.CapturedStmtInfo = &CapStmtInfo; - llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S); + llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); Fn->setDoesNotRecurse(); return Fn; } @@ -3867,7 +4782,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { if (C) { llvm::SmallVector<llvm::Value *, 16> CapturedVars; CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars); - llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS); + llvm::Function *OutlinedFn = + emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(), OutlinedFn, CapturedVars); } else { @@ -3918,16 +4834,22 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, return ComplexVal; } -static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, +static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, LValue LVal, RValue RVal) { - if (LVal.isGlobalReg()) { + if (LVal.isGlobalReg()) CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal); - } else { - CGF.EmitAtomicStore(RVal, LVal, - IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic, - LVal.isVolatile(), /*isInit=*/false); - } + else + CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false); +} + +static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, LValue LVal, + SourceLocation Loc) { + if (LVal.isGlobalReg()) + return CGF.EmitLoadOfLValue(LVal, Loc); + return CGF.EmitAtomicLoad( + LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO), + LVal.isVolatile()); } void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, @@ -3948,7 +4870,7 @@ void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, } } -static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, const Expr *X, const Expr *V, SourceLocation Loc) { // v = x; @@ -3956,34 +4878,54 @@ static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, assert(X->isLValue() && "X of 'omp atomic read' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); LValue VLValue = CGF.EmitLValue(V); - RValue Res = XLValue.isGlobalReg() - ? CGF.EmitLoadOfLValue(XLValue, Loc) - : CGF.EmitAtomicLoad( - XLValue, Loc, - IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic, - XLValue.isVolatile()); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc); + // OpenMP, 2.17.7, atomic Construct + // If the read or capture clause is specified and the acquire, acq_rel, or + // seq_cst clause is specified then the strong flush on exit from the atomic + // operation is also an acquire flush. + switch (AO) { + case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrdering::Monotonic: + case llvm::AtomicOrdering::Release: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); } -static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, - const Expr *X, const Expr *E, - SourceLocation Loc) { +static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, const Expr *X, + const Expr *E, SourceLocation Loc) { // x = expr; assert(X->isLValue() && "X of 'omp atomic write' is not lvalue"); - emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E)); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, @@ -4104,10 +5046,10 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( return Res; } -static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, - const Expr *X, const Expr *E, - const Expr *UE, bool IsXLHSInRHSPart, - SourceLocation Loc) { +static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, const Expr *X, + const Expr *E, const Expr *UE, + bool IsXLHSInRHSPart, SourceLocation Loc) { assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && "Update expr in 'atomic update' must be a binary operator."); const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts()); @@ -4120,9 +5062,6 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, assert(X->isLValue() && "X of 'omp atomic update' is not lvalue"); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - llvm::AtomicOrdering AO = IsSeqCst - ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic; const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts()); const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts()); const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; @@ -4134,12 +5073,25 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst, }; (void)CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } static RValue convertToType(CodeGenFunction &CGF, RValue Value, @@ -4159,7 +5111,8 @@ static RValue convertToType(CodeGenFunction &CGF, RValue Value, llvm_unreachable("Must be a scalar or complex."); } -static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, +static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, + llvm::AtomicOrdering AO, bool IsPostfixUpdate, const Expr *V, const Expr *X, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, @@ -4170,9 +5123,6 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, LValue VLValue = CGF.EmitLValue(V); LValue XLValue = CGF.EmitLValue(X); RValue ExprRValue = CGF.EmitAnyExpr(E); - llvm::AtomicOrdering AO = IsSeqCst - ? llvm::AtomicOrdering::SequentiallyConsistent - : llvm::AtomicOrdering::Monotonic; QualType NewVValType; if (UE) { // 'x' is updated with some additional value. @@ -4200,6 +5150,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, }; auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); if (Res.first) { // 'atomicrmw' instruction was generated. if (IsPostfixUpdate) { @@ -4226,6 +5177,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, Loc, Gen); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X); if (Res.first) { // 'atomicrmw' instruction was generated. NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; @@ -4233,32 +5185,54 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, } // Emit post-update store to 'v' of old/new 'x' value. CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); - // OpenMP, 2.12.6, atomic Construct - // Any atomic construct with a seq_cst clause forces the atomically - // performed operation to include an implicit flush operation without a - // list. - if (IsSeqCst) - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); + CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + // If the read or capture clause is specified and the acquire, acq_rel, or + // seq_cst clause is specified then the strong flush on exit from the atomic + // operation is also an acquire flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::AcquireRelease); + break; + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, - bool IsSeqCst, bool IsPostfixUpdate, + llvm::AtomicOrdering AO, bool IsPostfixUpdate, const Expr *X, const Expr *V, const Expr *E, const Expr *UE, bool IsXLHSInRHSPart, SourceLocation Loc) { switch (Kind) { case OMPC_read: - emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc); + emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); break; case OMPC_write: - emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc); + emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); break; case OMPC_unknown: case OMPC_update: - emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc); + emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_capture: - emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE, + emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, IsXLHSInRHSPart, Loc); break; case OMPC_if: @@ -4277,12 +5251,17 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_collapse: case OMPC_default: case OMPC_seq_cst: + case OMPC_acq_rel: + case OMPC_acquire: + case OMPC_release: + case OMPC_relaxed: case OMPC_shared: case OMPC_linear: case OMPC_aligned: case OMPC_copyin: case OMPC_copyprivate: case OMPC_flush: + case OMPC_depobj: case OMPC_proc_bind: case OMPC_schedule: case OMPC_ordered: @@ -4308,6 +5287,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_to: case OMPC_from: case OMPC_use_device_ptr: + case OMPC_use_device_addr: case OMPC_is_device_ptr: case OMPC_unified_address: case OMPC_unified_shared_memory: @@ -4317,38 +5297,76 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_device_type: case OMPC_match: case OMPC_nontemporal: + case OMPC_order: + case OMPC_destroy: + case OMPC_detach: + case OMPC_inclusive: + case OMPC_exclusive: + case OMPC_uses_allocators: + case OMPC_affinity: + default: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { - bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>(); + llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; + bool MemOrderingSpecified = false; + if (S.getSingleClause<OMPSeqCstClause>()) { + AO = llvm::AtomicOrdering::SequentiallyConsistent; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPAcqRelClause>()) { + AO = llvm::AtomicOrdering::AcquireRelease; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPAcquireClause>()) { + AO = llvm::AtomicOrdering::Acquire; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPReleaseClause>()) { + AO = llvm::AtomicOrdering::Release; + MemOrderingSpecified = true; + } else if (S.getSingleClause<OMPRelaxedClause>()) { + AO = llvm::AtomicOrdering::Monotonic; + MemOrderingSpecified = true; + } OpenMPClauseKind Kind = OMPC_unknown; for (const OMPClause *C : S.clauses()) { - // Find first clause (skip seq_cst clause, if it is first). - if (C->getClauseKind() != OMPC_seq_cst) { + // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, + // if it is first). + if (C->getClauseKind() != OMPC_seq_cst && + C->getClauseKind() != OMPC_acq_rel && + C->getClauseKind() != OMPC_acquire && + C->getClauseKind() != OMPC_release && + C->getClauseKind() != OMPC_relaxed) { Kind = C->getClauseKind(); break; } } - - const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); - if (const auto *FE = dyn_cast<FullExpr>(CS)) - enterFullExpression(FE); - // Processing for statements under 'atomic capture'. - if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) { - for (const Stmt *C : Compound->body()) { - if (const auto *FE = dyn_cast<FullExpr>(C)) - enterFullExpression(FE); + if (!MemOrderingSpecified) { + llvm::AtomicOrdering DefaultOrder = + CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); + if (DefaultOrder == llvm::AtomicOrdering::Monotonic || + DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || + (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && + Kind == OMPC_capture)) { + AO = DefaultOrder; + } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { + if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { + AO = llvm::AtomicOrdering::Release; + } else if (Kind == OMPC_read) { + assert(Kind == OMPC_read && "Unexpected atomic kind."); + AO = llvm::AtomicOrdering::Acquire; + } } } - auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF, + const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers(); + + auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStopPoint(CS); - emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(), - S.getV(), S.getExpr(), S.getUpdateExpr(), - S.isXLHSInRHSPart(), S.getBeginLoc()); + emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(), + S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(), + S.getBeginLoc()); }; OMPLexicalScope Scope(*this, S, OMPD_unknown); CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen); @@ -4370,6 +5388,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, return; } + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); llvm::Function *Fn = nullptr; llvm::Constant *FnID = nullptr; @@ -4384,9 +5404,10 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, } // Check if we have any device clause associated with the directive. - const Expr *Device = nullptr; + llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( + nullptr, OMPC_DEVICE_unknown); if (auto *C = S.getSingleClause<OMPDeviceClause>()) - Device = C->getDevice(); + Device.setPointerAndInt(C->getDevice(), C->getModifier()); // Check if we have an if clause whose conditional always evaluates to false // or if we do not have any targets specified. If so the target region is not @@ -4856,7 +5877,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { break; } } - if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // TODO: This check is necessary as we only generate `omp parallel` through // the OpenMPIRBuilder for now. if (S.getCancelRegion() == OMPD_parallel) { @@ -4865,7 +5887,7 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { IfCondition = EmitScalarExpr(IfCond, /*IgnoreResultAssign=*/true); return Builder.restoreIP( - OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion())); + OMPBuilder.CreateCancel(Builder, IfCondition, S.getCancelRegion())); } } @@ -4876,7 +5898,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { CodeGenFunction::JumpDest CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { if (Kind == OMPD_parallel || Kind == OMPD_task || - Kind == OMPD_target_parallel) + Kind == OMPD_target_parallel || Kind == OMPD_taskloop || + Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) return ReturnBlock; assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || @@ -4888,9 +5911,8 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { } void CodeGenFunction::EmitOMPUseDevicePtrClause( - const OMPClause &NC, OMPPrivateScope &PrivateScope, + const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { - const auto &C = cast<OMPUseDevicePtrClause>(NC); auto OrigVarIt = C.varlist_begin(); auto InitIt = C.inits().begin(); for (const Expr *PvtVarIt : C.private_copies()) { @@ -4951,6 +5973,60 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause( } } +static const VarDecl *getBaseDecl(const Expr *Ref) { + const Expr *Base = Ref->IgnoreParenImpCasts(); + while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base)) + Base = OASE->getBase()->IgnoreParenImpCasts(); + while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base)) + Base = ASE->getBase()->IgnoreParenImpCasts(); + return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl()); +} + +void CodeGenFunction::EmitOMPUseDeviceAddrClause( + const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, + const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) { + llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; + for (const Expr *Ref : C.varlists()) { + const VarDecl *OrigVD = getBaseDecl(Ref); + if (!Processed.insert(OrigVD).second) + continue; + // In order to identify the right initializer we need to match the + // declaration used by the mapping logic. In some cases we may get + // OMPCapturedExprDecl that refers to the original declaration. + const ValueDecl *MatchingVD = OrigVD; + if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { + // OMPCapturedExprDecl are used to privative fields of the current + // structure. + const auto *ME = cast<MemberExpr>(OED->getInit()); + assert(isa<CXXThisExpr>(ME->getBase()) && + "Base should be the current struct!"); + MatchingVD = ME->getMemberDecl(); + } + + // If we don't have information about the current list item, move on to + // the next one. + auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); + if (InitAddrIt == CaptureDeviceAddrMap.end()) + continue; + + Address PrivAddr = InitAddrIt->getSecond(); + // For declrefs and variable length array need to load the pointer for + // correct mapping, since the pointer to the data was passed to the runtime. + if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || + MatchingVD->getType()->isArrayType()) + PrivAddr = + EmitLoadOfPointer(PrivAddr, getContext() + .getPointerType(OrigVD->getType()) + ->castAs<PointerType>()); + llvm::Type *RealTy = + ConvertTypeForMem(OrigVD->getType().getNonReferenceType()) + ->getPointerTo(); + PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy); + + (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; }); + } +} + // Generate the instructions for '#pragma omp target data' directive. void CodeGenFunction::EmitOMPTargetDataDirective( const OMPTargetDataDirective &S) { @@ -4995,9 +6071,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective( for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, Info.CaptureDeviceAddrMap); + for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) + CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, + Info.CaptureDeviceAddrMap); (void)PrivateScope.Privatize(); RCG(CGF); } else { + OMPLexicalScope Scope(CGF, S, OMPD_unknown); RCG(CGF); } }; @@ -5222,7 +6302,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); // Emit outlined function for task construct. const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); - Address CapturedStruct = GenerateCapturedStmtArgument(*CS); + Address CapturedStruct = Address::invalid(); + { + OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); + CapturedStruct = GenerateCapturedStmtArgument(*CS); + } QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); const Expr *IfCond = nullptr; for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { @@ -5322,8 +6406,8 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { CGF.EmitOMPInnerLoop( S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest()); - CGF.EmitStopPoint(&S); + emitOMPLoopBodyWithStopPoint(CGF, S, + CodeGenFunction::JumpDest()); }, [](CodeGenFunction &) {}); }); @@ -5376,11 +6460,15 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { } void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); EmitOMPTaskLoopBasedDirective(S); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( const OMPTaskLoopSimdDirective &S) { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S); EmitOMPTaskLoopBasedDirective(S); } @@ -5391,6 +6479,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective( Action.Enter(CGF); EmitOMPTaskLoopBasedDirective(S); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } @@ -5401,6 +6491,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( Action.Enter(CGF); EmitOMPTaskLoopBasedDirective(S); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); OMPLexicalScope Scope(*this, S); CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc()); } @@ -5413,10 +6505,12 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( Action.Enter(CGF); CGF.EmitOMPTaskLoopBasedDirective(S); }; - OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false); + OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, S.getBeginLoc()); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, emitEmptyBoundParameters); } @@ -5433,6 +6527,8 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen, S.getBeginLoc()); }; + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, emitEmptyBoundParameters); } @@ -5461,19 +6557,43 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective( void CodeGenFunction::EmitSimpleOMPExecutableDirective( const OMPExecutableDirective &D) { + if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) { + EmitOMPScanDirective(*SD); + return; + } if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) return; auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { + OMPPrivateScope GlobalsScope(CGF); + if (isOpenMPTaskingDirective(D.getDirectiveKind())) { + // Capture global firstprivates to avoid crash. + for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { + for (const Expr *Ref : C->varlists()) { + const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); + if (!DRE) + continue; + const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); + if (!VD || VD->hasLocalStorage()) + continue; + if (!CGF.LocalDeclMap.count(VD)) { + LValue GlobLVal = CGF.EmitLValue(Ref); + GlobalsScope.addPrivate( + VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); + } + } + } + } if (isOpenMPSimdDirective(D.getDirectiveKind())) { + (void)GlobalsScope.Privatize(); + ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action); } else { - OMPPrivateScope LoopGlobals(CGF); if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) { for (const Expr *E : LD->counters()) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { LValue GlobLVal = CGF.EmitLValue(E); - LoopGlobals.addPrivate( + GlobalsScope.addPrivate( VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); }); } if (isa<OMPCapturedExprDecl>(VD)) { @@ -5497,14 +6617,20 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( } } } - LoopGlobals.Privatize(); + (void)GlobalsScope.Privatize(); CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt()); } }; - OMPSimdLexicalScope Scope(*this, D); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, - isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd - : D.getDirectiveKind(), - CodeGen); + { + auto LPCRegion = + CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D); + OMPSimdLexicalScope Scope(*this, D); + CGM.getOpenMPRuntime().emitInlinedDirective( + *this, + isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd + : D.getDirectiveKind(), + CodeGen); + } + // Check for outer lastprivate conditional update. + checkForLastprivateConditionalUpdate(*this, D); } |