aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGStmtOpenMP.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp1726
1 files changed, 1426 insertions, 300 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index dc3899f0e4ea..cfd5eda8cc80 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -18,14 +18,22 @@
#include "clang/AST/ASTContext.h"
#include "clang/AST/Attr.h"
#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/OpenMPClause.h"
#include "clang/AST/Stmt.h"
#include "clang/AST/StmtOpenMP.h"
+#include "clang/Basic/OpenMPKinds.h"
#include "clang/Basic/PrettyStackTrace.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/AtomicOrdering.h"
using namespace clang;
using namespace CodeGen;
using namespace llvm::omp;
+static const VarDecl *getBaseDecl(const Expr *Ref);
+
namespace {
/// Lexical scope for OpenMP executable constructs, that handles correct codegen
/// for captured expressions.
@@ -53,7 +61,8 @@ class OMPLexicalScope : public CodeGenFunction::LexicalScope {
static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
return CGF.LambdaCaptureFields.lookup(VD) ||
(CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
- (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl));
+ (CGF.CurCodeDecl && isa<BlockDecl>(CGF.CurCodeDecl) &&
+ cast<BlockDecl>(CGF.CurCodeDecl)->capturesVariable(VD));
}
public:
@@ -214,6 +223,12 @@ public:
if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
CGF.EmitVarDecl(*OED);
}
+ } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(C)) {
+ for (const Expr *E : UDP->varlists()) {
+ const Decl *D = getBaseDecl(E);
+ if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
+ CGF.EmitVarDecl(*OED);
+ }
}
}
if (!isOpenMPSimdDirective(S.getDirectiveKind()))
@@ -365,26 +380,28 @@ static QualType getCanonicalParamType(ASTContext &C, QualType T) {
}
namespace {
- /// Contains required data for proper outlined function codegen.
- struct FunctionOptions {
- /// Captured statement for which the function is generated.
- const CapturedStmt *S = nullptr;
- /// true if cast to/from UIntPtr is required for variables captured by
- /// value.
- const bool UIntPtrCastRequired = true;
- /// true if only casted arguments must be registered as local args or VLA
- /// sizes.
- const bool RegisterCastedArgsOnly = false;
- /// Name of the generated function.
- const StringRef FunctionName;
- explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
- bool RegisterCastedArgsOnly,
- StringRef FunctionName)
- : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
- RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
- FunctionName(FunctionName) {}
- };
-}
+/// Contains required data for proper outlined function codegen.
+struct FunctionOptions {
+ /// Captured statement for which the function is generated.
+ const CapturedStmt *S = nullptr;
+ /// true if cast to/from UIntPtr is required for variables captured by
+ /// value.
+ const bool UIntPtrCastRequired = true;
+ /// true if only casted arguments must be registered as local args or VLA
+ /// sizes.
+ const bool RegisterCastedArgsOnly = false;
+ /// Name of the generated function.
+ const StringRef FunctionName;
+ /// Location of the non-debug version of the outlined function.
+ SourceLocation Loc;
+ explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
+ bool RegisterCastedArgsOnly, StringRef FunctionName,
+ SourceLocation Loc)
+ : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
+ RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
+ FunctionName(FunctionName), Loc(Loc) {}
+};
+} // namespace
static llvm::Function *emitOutlinedFunctionPrologue(
CodeGenFunction &CGF, FunctionArgList &Args,
@@ -485,7 +502,9 @@ static llvm::Function *emitOutlinedFunctionPrologue(
// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
- FO.S->getBeginLoc(), CD->getBody()->getBeginLoc());
+ FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
+ FO.UIntPtrCastRequired ? FO.Loc
+ : CD->getBody()->getBeginLoc());
unsigned Cnt = CD->getContextParamPosition();
I = FO.S->captures().begin();
for (const FieldDecl *FD : RD->fields()) {
@@ -560,7 +579,8 @@ static llvm::Function *emitOutlinedFunctionPrologue(
}
llvm::Function *
-CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
+CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
+ SourceLocation Loc) {
assert(
CapturedStmtInfo &&
"CapturedStmtInfo should be set when generating the captured function");
@@ -577,7 +597,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
if (NeedWrapperFunction)
Out << "_debug__";
FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
- Out.str());
+ Out.str(), Loc);
llvm::Function *F = emitOutlinedFunctionPrologue(*this, Args, LocalAddrs,
VLASizes, CXXThisValue, FO);
CodeGenFunction::OMPPrivateScope LocalScope(*this);
@@ -600,7 +620,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
/*RegisterCastedArgsOnly=*/true,
- CapturedStmtInfo->getHelperName());
+ CapturedStmtInfo->getHelperName(), Loc);
CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
Args.clear();
@@ -632,8 +652,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) {
}
CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
}
- CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, S.getBeginLoc(),
- F, CallArgs);
+ CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
WrapperCGF.FinishFunction();
return WrapperF;
}
@@ -747,11 +766,12 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
getLangOpts().OpenMPIsDevice &&
isOpenMPTargetExecutionDirective(D.getDirectiveKind());
bool FirstprivateIsLastprivate = false;
- llvm::DenseSet<const VarDecl *> Lastprivates;
+ llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
for (const auto *D : C->varlists())
- Lastprivates.insert(
- cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
+ Lastprivates.try_emplace(
+ cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
+ C->getKind());
}
llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
@@ -761,8 +781,8 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
bool MustEmitFirstprivateCopy =
CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
- auto IRef = C->varlist_begin();
- auto InitsRef = C->inits().begin();
+ const auto *IRef = C->varlist_begin();
+ const auto *InitsRef = C->inits().begin();
for (const Expr *IInit : C->private_copies()) {
const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
bool ThisFirstprivateIsLastprivate =
@@ -853,14 +873,34 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
});
} else {
Address OriginalAddr = OriginalLVal.getAddress(*this);
- IsRegistered = PrivateScope.addPrivate(
- OrigVD, [this, VDInit, OriginalAddr, VD]() {
+ IsRegistered =
+ PrivateScope.addPrivate(OrigVD, [this, VDInit, OriginalAddr, VD,
+ ThisFirstprivateIsLastprivate,
+ OrigVD, &Lastprivates, IRef]() {
// Emit private VarDecl with copy init.
// Remap temp VDInit variable to the address of the original
// variable (for proper handling of captured global variables).
setAddrOfLocalVar(VDInit, OriginalAddr);
EmitDecl(*VD);
LocalDeclMap.erase(VDInit);
+ if (ThisFirstprivateIsLastprivate &&
+ Lastprivates[OrigVD->getCanonicalDecl()] ==
+ OMPC_LASTPRIVATE_conditional) {
+ // Create/init special variable for lastprivate conditionals.
+ Address VDAddr =
+ CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
+ *this, OrigVD);
+ llvm::Value *V = EmitLoadOfScalar(
+ MakeAddrLValue(GetAddrOfLocalVar(VD), (*IRef)->getType(),
+ AlignmentSource::Decl),
+ (*IRef)->getExprLoc());
+ EmitStoreOfScalar(V,
+ MakeAddrLValue(VDAddr, (*IRef)->getType(),
+ AlignmentSource::Decl));
+ LocalDeclMap.erase(VD);
+ setAddrOfLocalVar(VD, VDAddr);
+ return VDAddr;
+ }
return GetAddrOfLocalVar(VD);
});
}
@@ -990,8 +1030,8 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
!getLangOpts().OpenMPSimd)
break;
- auto IRef = C->varlist_begin();
- auto IDestRef = C->destination_exprs().begin();
+ const auto *IRef = C->varlist_begin();
+ const auto *IDestRef = C->destination_exprs().begin();
for (const Expr *IInit : C->private_copies()) {
// Keep the address of the original variable for future update at the end
// of the loop.
@@ -1013,7 +1053,15 @@ bool CodeGenFunction::EmitOMPLastprivateClauseInit(
// for 'firstprivate' clause.
if (IInit && !SIMDLCVs.count(OrigVD->getCanonicalDecl())) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(IInit)->getDecl());
- bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD]() {
+ bool IsRegistered = PrivateScope.addPrivate(OrigVD, [this, VD, C,
+ OrigVD]() {
+ if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
+ Address VDAddr =
+ CGM.getOpenMPRuntime().emitLastprivateConditionalInit(*this,
+ OrigVD);
+ setAddrOfLocalVar(VD, VDAddr);
+ return VDAddr;
+ }
// Emit private VarDecl with copy init.
EmitDecl(*VD);
return GetAddrOfLocalVar(VD);
@@ -1099,7 +1147,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
PrivateAddr =
Address(Builder.CreateLoad(PrivateAddr),
- getNaturalTypeAlignment(RefTy->getPointeeType()));
+ CGM.getNaturalTypeAlignment(RefTy->getPointeeType()));
// Store the last value to the private copy in the last iteration.
if (C->getKind() == OMPC_LASTPRIVATE_conditional)
CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
@@ -1122,7 +1170,7 @@ void CodeGenFunction::EmitOMPLastprivateClauseFinal(
void CodeGenFunction::EmitOMPReductionClauseInit(
const OMPExecutableDirective &D,
- CodeGenFunction::OMPPrivateScope &PrivateScope) {
+ CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
if (!HaveInsertPoint())
return;
SmallVector<const Expr *, 4> Shareds;
@@ -1130,32 +1178,36 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
SmallVector<const Expr *, 4> ReductionOps;
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
+ OMPTaskDataTy Data;
+ SmallVector<const Expr *, 4> TaskLHSs;
+ SmallVector<const Expr *, 4> TaskRHSs;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Shareds.emplace_back(Ref);
- Privates.emplace_back(*IPriv);
- ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
+ if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
+ continue;
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ if (C->getModifier() == OMPC_REDUCTION_task) {
+ Data.ReductionVars.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ TaskLHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ TaskRHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
}
- ReductionCodeGen RedCG(Shareds, Privates, ReductionOps);
+ ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
unsigned Count = 0;
- auto ILHS = LHSs.begin();
- auto IRHS = RHSs.begin();
- auto IPriv = Privates.begin();
+ auto *ILHS = LHSs.begin();
+ auto *IRHS = RHSs.begin();
+ auto *IPriv = Privates.begin();
for (const Expr *IRef : Shareds) {
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*IPriv)->getDecl());
// Emit private VarDecl with reduction init.
- RedCG.emitSharedLValue(*this, Count);
+ RedCG.emitSharedOrigLValue(*this, Count);
RedCG.emitAggregateType(*this, Count);
AutoVarEmission Emission = EmitAutoVarAlloca(*PrivateVD);
RedCG.emitInitialization(*this, Count, Emission.getAllocatedAddress(),
@@ -1222,6 +1274,118 @@ void CodeGenFunction::EmitOMPReductionClauseInit(
++IPriv;
++Count;
}
+ if (!Data.ReductionVars.empty()) {
+ Data.IsReductionWithTaskMod = true;
+ Data.IsWorksharingReduction =
+ isOpenMPWorksharingDirective(D.getDirectiveKind());
+ llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
+ *this, D.getBeginLoc(), TaskLHSs, TaskRHSs, Data);
+ const Expr *TaskRedRef = nullptr;
+ switch (D.getDirectiveKind()) {
+ case OMPD_parallel:
+ TaskRedRef = cast<OMPParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_for:
+ TaskRedRef = cast<OMPForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_sections:
+ TaskRedRef = cast<OMPSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_for:
+ TaskRedRef = cast<OMPParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_master:
+ TaskRedRef =
+ cast<OMPParallelMasterDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_parallel_sections:
+ TaskRedRef =
+ cast<OMPParallelSectionsDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel:
+ TaskRedRef =
+ cast<OMPTargetParallelDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_target_parallel_for:
+ TaskRedRef =
+ cast<OMPTargetParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_distribute_parallel_for:
+ TaskRedRef =
+ cast<OMPDistributeParallelForDirective>(D).getTaskReductionRefExpr();
+ break;
+ case OMPD_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_target_teams_distribute_parallel_for:
+ TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(D)
+ .getTaskReductionRefExpr();
+ break;
+ case OMPD_simd:
+ case OMPD_for_simd:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_parallel_for_simd:
+ case OMPD_task:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_flush:
+ case OMPD_depobj:
+ case OMPD_scan:
+ case OMPD_ordered:
+ case OMPD_atomic:
+ case OMPD_teams:
+ case OMPD_target:
+ case OMPD_cancellation_point:
+ case OMPD_cancel:
+ case OMPD_target_data:
+ case OMPD_target_enter_data:
+ case OMPD_target_exit_data:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_master_taskloop:
+ case OMPD_master_taskloop_simd:
+ case OMPD_parallel_master_taskloop:
+ case OMPD_parallel_master_taskloop_simd:
+ case OMPD_distribute:
+ case OMPD_target_update:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_distribute_simd:
+ case OMPD_target_parallel_for_simd:
+ case OMPD_target_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams:
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_threadprivate:
+ case OMPD_allocate:
+ case OMPD_declare_reduction:
+ case OMPD_declare_mapper:
+ case OMPD_declare_simd:
+ case OMPD_requires:
+ case OMPD_declare_variant:
+ case OMPD_begin_declare_variant:
+ case OMPD_end_declare_variant:
+ case OMPD_unknown:
+ default:
+ llvm_unreachable("Enexpected directive with task reductions.");
+ }
+
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(TaskRedRef)->getDecl());
+ EmitVarDecl(*VD);
+ EmitStoreOfScalar(ReductionDesc, GetAddrOfLocalVar(VD),
+ /*Volatile=*/false, TaskRedRef->getType());
+ }
}
void CodeGenFunction::EmitOMPReductionClauseFinal(
@@ -1233,14 +1397,25 @@ void CodeGenFunction::EmitOMPReductionClauseFinal(
llvm::SmallVector<const Expr *, 8> RHSExprs;
llvm::SmallVector<const Expr *, 8> ReductionOps;
bool HasAtLeastOneReduction = false;
+ bool IsReductionWithTaskMod = false;
for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+ // Do not emit for inscan reductions.
+ if (C->getModifier() == OMPC_REDUCTION_inscan)
+ continue;
HasAtLeastOneReduction = true;
Privates.append(C->privates().begin(), C->privates().end());
LHSExprs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
RHSExprs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ IsReductionWithTaskMod =
+ IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
}
if (HasAtLeastOneReduction) {
+ if (IsReductionWithTaskMod) {
+ CGM.getOpenMPRuntime().emitTaskReductionFini(
+ *this, D.getBeginLoc(),
+ isOpenMPWorksharingDirective(D.getDirectiveKind()));
+ }
bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
isOpenMPParallelDirective(D.getDirectiveKind()) ||
ReductionKind == OMPD_simd;
@@ -1288,6 +1463,63 @@ typedef llvm::function_ref<void(CodeGenFunction &,
CodeGenBoundParametersTy;
} // anonymous namespace
+static void
+checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S) {
+ if (CGF.getLangOpts().OpenMP < 50)
+ return;
+ llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ }
+ for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
+ }
+ }
+ // Privates should ne analyzed since they are not captured at all.
+ // Task reductions may be skipped - tasks are ignored.
+ // Firstprivates do not return value but may be passed by reference - no need
+ // to check for updated lastprivate conditional.
+ for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ if (!Ref->getType()->isScalarType())
+ continue;
+ const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
+ }
+ }
+ CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
+ CGF, S, PrivateDecls);
+}
+
static void emitCommonOMPParallelDirective(
CodeGenFunction &CGF, const OMPExecutableDirective &S,
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
@@ -1334,9 +1566,97 @@ static void emitEmptyBoundParameters(CodeGenFunction &,
const OMPExecutableDirective &,
llvm::SmallVectorImpl<llvm::Value *> &) {}
-void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
+ CodeGenFunction &CGF, const VarDecl *VD) {
+ CodeGenModule &CGM = CGF.CGM;
+ auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+ if (!VD)
+ return Address::invalid();
+ const VarDecl *CVD = VD->getCanonicalDecl();
+ if (!CVD->hasAttr<OMPAllocateDeclAttr>())
+ return Address::invalid();
+ const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
+ // Use the default allocation.
+ if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
+ !AA->getAllocator())
+ return Address::invalid();
+ llvm::Value *Size;
+ CharUnits Align = CGM.getContext().getDeclAlign(CVD);
+ if (CVD->getType()->isVariablyModifiedType()) {
+ Size = CGF.getTypeSize(CVD->getType());
+ // Align the size: ((size + align - 1) / align) * align
+ Size = CGF.Builder.CreateNUWAdd(
+ Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
+ Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
+ Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
+ } else {
+ CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
+ Size = CGM.getSize(Sz.alignTo(Align));
+ }
+
+ assert(AA->getAllocator() &&
+ "Expected allocator expression for non-default allocator.");
+ llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
+ // According to the standard, the original allocator type is a enum (integer).
+ // Convert to pointer type, if required.
+ if (Allocator->getType()->isIntegerTy())
+ Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
+ else if (Allocator->getType()->isPointerTy())
+ Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
+ CGM.VoidPtrTy);
+
+ llvm::Value *Addr = OMPBuilder.CreateOMPAlloc(
+ CGF.Builder, Size, Allocator,
+ getNameWithSeparators({CVD->getName(), ".void.addr"}, ".", "."));
+ llvm::CallInst *FreeCI =
+ OMPBuilder.CreateOMPFree(CGF.Builder, Addr, Allocator);
+
+ CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FreeCI);
+ Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ Addr,
+ CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
+ getNameWithSeparators({CVD->getName(), ".addr"}, ".", "."));
+ return Address(Addr, Align);
+}
+
+Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
+ CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
+ SourceLocation Loc) {
+ CodeGenModule &CGM = CGF.CGM;
+ if (CGM.getLangOpts().OpenMPUseTLS &&
+ CGM.getContext().getTargetInfo().isTLSSupported())
+ return VDAddr;
+
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+
+ llvm::Type *VarTy = VDAddr.getElementType();
+ llvm::Value *Data =
+ CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy);
+ llvm::ConstantInt *Size = CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy));
+ std::string Suffix = getNameWithSeparators({"cache", ""});
+ llvm::Twine CacheName = Twine(CGM.getMangledName(VD)).concat(Suffix);
+
+ llvm::CallInst *ThreadPrivateCacheCall =
+ OMPBuilder.CreateCachedThreadPrivate(CGF.Builder, Data, Size, CacheName);
+
+ return Address(ThreadPrivateCacheCall, VDAddr.getAlignment());
+}
- if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
+std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
+ ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
+ SmallString<128> Buffer;
+ llvm::raw_svector_ostream OS(Buffer);
+ StringRef Sep = FirstSeparator;
+ for (StringRef Part : Parts) {
+ OS << Sep << Part;
+ Sep = Separator;
+ }
+ return OS.str().str();
+}
+void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
// Check if we have any if clause associated with the directive.
llvm::Value *IfCond = nullptr;
if (const auto *C = S.getSingleClause<OMPIfClause>())
@@ -1357,15 +1677,7 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// The cleanup callback that finalizes all variabels at the given location,
// thus calls destructors etc.
auto FiniCB = [this](InsertPointTy IP) {
- CGBuilderTy::InsertPointGuard IPG(Builder);
- assert(IP.getBlock()->end() != IP.getPoint() &&
- "OpenMP IR Builder should cause terminated block!");
- llvm::BasicBlock *IPBB = IP.getBlock();
- llvm::BasicBlock *DestBB = IPBB->splitBasicBlock(IP.getPoint());
- IPBB->getTerminator()->eraseFromParent();
- Builder.SetInsertPoint(IPBB);
- CodeGenFunction::JumpDest Dest = getJumpDestInCurrentScope(DestBB);
- EmitBranchThroughCleanup(Dest);
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
};
// Privatization callback that performs appropriate action for
@@ -1387,32 +1699,17 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
auto BodyGenCB = [ParallelRegionBodyStmt,
this](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
llvm::BasicBlock &ContinuationBB) {
- auto OldAllocaIP = AllocaInsertPt;
- AllocaInsertPt = &*AllocaIP.getPoint();
-
- auto OldReturnBlock = ReturnBlock;
- ReturnBlock = getJumpDestInCurrentScope(&ContinuationBB);
-
- llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
- CodeGenIPBB->splitBasicBlock(CodeGenIP.getPoint());
- llvm::Instruction *CodeGenIPBBTI = CodeGenIPBB->getTerminator();
- CodeGenIPBBTI->removeFromParent();
-
- Builder.SetInsertPoint(CodeGenIPBB);
-
- EmitStmt(ParallelRegionBodyStmt);
-
- Builder.Insert(CodeGenIPBBTI);
-
- AllocaInsertPt = OldAllocaIP;
- ReturnBlock = OldReturnBlock;
+ OMPBuilderCBHelpers::OutlinedRegionBodyRAII ORB(*this, AllocaIP,
+ ContinuationBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, ParallelRegionBodyStmt,
+ CodeGenIP, ContinuationBB);
};
CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
- Builder.restoreIP(OMPBuilder->CreateParallel(Builder, BodyGenCB, PrivCB,
- FiniCB, IfCond, NumThreads,
- ProcBind, S.hasCancel()));
+ Builder.restoreIP(OMPBuilder.CreateParallel(Builder, BodyGenCB, PrivCB,
+ FiniCB, IfCond, NumThreads,
+ ProcBind, S.hasCancel()));
return;
}
@@ -1436,10 +1733,16 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
- emitEmptyBoundParameters);
- emitPostUpdateForReductionClause(*this, S,
- [](CodeGenFunction &) { return nullptr; });
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
+ emitPostUpdateForReductionClause(*this, S,
+ [](CodeGenFunction &) { return nullptr; });
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
@@ -1506,6 +1809,27 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
getProfileCount(D.getBody()));
EmitBlock(NextBB);
}
+
+ OMPPrivateScope InscanScope(*this);
+ EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
+ bool IsInscanRegion = InscanScope.Privatize();
+ if (IsInscanRegion) {
+ // Need to remember the block before and after scan directive
+ // to dispatch them correctly depending on the clause used in
+ // this directive, inclusive or exclusive. For inclusive scan the natural
+ // order of the blocks is used, for exclusive clause the blocks must be
+ // executed in reverse order.
+ OMPBeforeScanBlock = createBasicBlock("omp.before.scan.bb");
+ OMPAfterScanBlock = createBasicBlock("omp.after.scan.bb");
+ // No need to allocate inscan exit block, in simd mode it is selected in the
+ // codegen for the scan directive.
+ if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
+ OMPScanExitBlock = createBasicBlock("omp.exit.inscan.bb");
+ OMPScanDispatch = createBasicBlock("omp.inscan.dispatch");
+ EmitBranch(OMPScanDispatch);
+ EmitBlock(OMPBeforeScanBlock);
+ }
+
// Emit loop variables for C++ range loops.
const Stmt *Body =
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
@@ -1515,13 +1839,17 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
Body, /*TryImperfectlyNestedLoops=*/true),
D.getCollapsedNumber());
+ // Jump to the dispatcher at the end of the loop body.
+ if (IsInscanRegion)
+ EmitBranch(OMPScanExitBlock);
+
// The end (updates/cleanups).
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
}
void CodeGenFunction::EmitOMPInnerLoop(
- const Stmt &S, bool RequiresCleanup, const Expr *LoopCond,
+ const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
const Expr *IncExpr,
const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
@@ -1531,8 +1859,19 @@ void CodeGenFunction::EmitOMPInnerLoop(
auto CondBlock = createBasicBlock("omp.inner.for.cond");
EmitBlock(CondBlock);
const SourceRange R = S.getSourceRange();
- LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
- SourceLocToDebugLoc(R.getEnd()));
+
+ // If attributes are attached, push to the basic block with them.
+ const auto &OMPED = cast<OMPExecutableDirective>(S);
+ const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
+ const Stmt *SS = ICS->getCapturedStmt();
+ const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
+ if (AS)
+ LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
+ AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
+ else
+ LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
+ SourceLocToDebugLoc(R.getEnd()));
// If there are any cleanups between here and the loop-exit scope,
// create a block to stage a loop exit along.
@@ -1671,7 +2010,7 @@ static void emitAlignedClause(CodeGenFunction &CGF,
"alignment is not power of 2");
if (Alignment != 0) {
llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
- CGF.EmitAlignmentAssumption(
+ CGF.emitAlignmentAssumption(
PtrValue, E, /*No second loc needed*/ SourceLocation(),
llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
}
@@ -1835,6 +2174,18 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
LoopStack.setParallel(!IsMonotonic);
LoopStack.setVectorizeEnable();
emitSimdlenSafelenClause(*this, D, IsMonotonic);
+ if (const auto *C = D.getSingleClause<OMPOrderClause>())
+ if (C->getKind() == OMPC_ORDER_concurrent)
+ LoopStack.setParallel(/*Enable=*/true);
+ if ((D.getDirectiveKind() == OMPD_simd ||
+ (getLangOpts().OpenMPSimd &&
+ isOpenMPSimdDirective(D.getDirectiveKind()))) &&
+ llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ }))
+ // Disable parallel access in case of prefix sum.
+ LoopStack.setParallel(/*Enable=*/false);
}
void CodeGenFunction::EmitOMPSimdFinal(
@@ -1886,7 +2237,6 @@ void CodeGenFunction::EmitOMPSimdFinal(
static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
const OMPLoopDirective &S,
CodeGenFunction::JumpDest LoopExit) {
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
CGF.EmitOMPLoopBody(S, LoopExit);
CGF.EmitStopPoint(&S);
}
@@ -1917,12 +2267,14 @@ static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
BodyCodeGen(CGF);
};
const Expr *IfCond = nullptr;
- for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
- if (CGF.getLangOpts().OpenMP >= 50 &&
- (C->getNameModifier() == OMPD_unknown ||
- C->getNameModifier() == OMPD_simd)) {
- IfCond = C->getCondition();
- break;
+ if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+ for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
+ if (CGF.getLangOpts().OpenMP >= 50 &&
+ (C->getNameModifier() == OMPD_unknown ||
+ C->getNameModifier() == OMPD_simd)) {
+ IfCond = C->getCondition();
+ break;
+ }
}
}
if (IfCond) {
@@ -2007,10 +2359,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
CGF.EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
[&S](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(
- CGF, S);
- CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
- CGF.EmitStopPoint(&S);
+ emitOMPLoopBodyWithStopPoint(CGF, S,
+ CodeGenFunction::JumpDest());
},
[](CodeGenFunction &) {});
});
@@ -2031,11 +2381,19 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
}
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
+ ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
+ OMPFirstScanLoop = true;
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
emitOMPSimdRegion(CGF, S, Action);
};
- OMPLexicalScope Scope(*this, S, OMPD_unknown);
- CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ OMPLexicalScope Scope(*this, S, OMPD_unknown);
+ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPOuterLoop(
@@ -2103,10 +2461,14 @@ void CodeGenFunction::EmitOMPOuterLoop(
[&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
// Generate !llvm.loop.parallel metadata for loads and stores for loops
// with dynamic/guided scheduling and without ordered clause.
- if (!isOpenMPSimdDirective(S.getDirectiveKind()))
+ if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
CGF.LoopStack.setParallel(!IsMonotonic);
- else
+ if (const auto *C = S.getSingleClause<OMPOrderClause>())
+ if (C->getKind() == OMPC_ORDER_concurrent)
+ CGF.LoopStack.setParallel(/*Enable=*/true);
+ } else {
CGF.EmitOMPSimdInit(S, IsMonotonic);
+ }
},
[&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
&LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -2612,6 +2974,14 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
bool StaticChunkedOne = RT.isStaticChunked(ScheduleKind.Schedule,
/* Chunked */ Chunk != nullptr) && HasChunkSizeOne &&
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
+ bool IsMonotonic =
+ Ordered ||
+ ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
+ ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
+ !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
+ ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
+ ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
+ ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
if ((RT.isStaticNonchunked(ScheduleKind.Schedule,
/* Chunked */ Chunk != nullptr) ||
StaticChunkedOne) &&
@@ -2620,9 +2990,13 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
emitCommonSimdLoop(
*this, S,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- if (isOpenMPSimdDirective(S.getDirectiveKind()))
- CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+ [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
+ if (isOpenMPSimdDirective(S.getDirectiveKind())) {
+ CGF.EmitOMPSimdInit(S, IsMonotonic);
+ } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
+ if (C->getKind() == OMPC_ORDER_concurrent)
+ CGF.LoopStack.setParallel(/*Enable=*/true);
+ }
},
[IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
&S, ScheduleKind, LoopExit,
@@ -2663,10 +3037,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
: S.getCond(),
StaticChunkedOne ? S.getDistInc() : S.getInc(),
[&S, LoopExit](CodeGenFunction &CGF) {
- CGF.CGM.getOpenMPRuntime()
- .initLastprivateConditionalCounter(CGF, S);
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
+ emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
},
[](CodeGenFunction &) {});
});
@@ -2678,11 +3049,6 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
};
OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
} else {
- const bool IsMonotonic =
- Ordered || ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
- ScheduleKind.Schedule == OMPC_SCHEDULE_unknown ||
- ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
- ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
const OMPLoopArguments LoopArguments(
@@ -2755,16 +3121,233 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
return {LBVal, UBVal};
}
+/// Emits the code for the directive with inscan reductions.
+/// The code is the following:
+/// \code
+/// size num_iters = <num_iters>;
+/// <type> buffer[num_iters];
+/// #pragma omp ...
+/// for (i: 0..<num_iters>) {
+/// <input phase>;
+/// buffer[i] = red;
+/// }
+/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
+/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
+/// buffer[i] op= buffer[i-pow(2,k)];
+/// #pragma omp ...
+/// for (0..<num_iters>) {
+/// red = InclusiveScan ? buffer[i] : buffer[i-1];
+/// <scan phase>;
+/// }
+/// \endcode
+static void emitScanBasedDirective(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
+ llvm::function_ref<void(CodeGenFunction &)> FirstGen,
+ llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
+ llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
+ NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
+ SmallVector<const Expr *, 4> Shareds;
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ SmallVector<const Expr *, 4> CopyOps;
+ SmallVector<const Expr *, 4> CopyArrayTemps;
+ SmallVector<const Expr *, 4> CopyArrayElems;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ assert(C->getModifier() == OMPC_REDUCTION_inscan &&
+ "Only inscan reductions are expected.");
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+ CopyArrayTemps.append(C->copy_array_temps().begin(),
+ C->copy_array_temps().end());
+ CopyArrayElems.append(C->copy_array_elems().begin(),
+ C->copy_array_elems().end());
+ }
+ {
+ // Emit buffers for each reduction variables.
+ // ReductionCodeGen is required to emit correctly the code for array
+ // reductions.
+ ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
+ unsigned Count = 0;
+ auto *ITA = CopyArrayTemps.begin();
+ for (const Expr *IRef : Privates) {
+ const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+ // Emit variably modified arrays, used for arrays/array sections
+ // reductions.
+ if (PrivateVD->getType()->isVariablyModifiedType()) {
+ RedCG.emitSharedOrigLValue(CGF, Count);
+ RedCG.emitAggregateType(CGF, Count);
+ }
+ CodeGenFunction::OpaqueValueMapping DimMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
+ ->getSizeExpr()),
+ RValue::get(OMPScanNumIterations));
+ // Emit temp buffer.
+ CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(*ITA)->getDecl()));
+ ++ITA;
+ ++Count;
+ }
+ }
+ CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
+ {
+ // Emit loop with input phase:
+ // #pragma omp ...
+ // for (i: 0..<num_iters>) {
+ // <input phase>;
+ // buffer[i] = red;
+ // }
+ CGF.OMPFirstScanLoop = true;
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ FirstGen(CGF);
+ }
+ // Emit prefix reduction:
+ // for (int k = 0; k <= ceil(log2(n)); ++k)
+ llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
+ llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
+ llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
+ llvm::Value *Arg =
+ CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
+ llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
+ F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
+ LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
+ LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
+ llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
+ OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
+ CGF.EmitBlock(LoopBB);
+ auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
+ // size pow2k = 1;
+ auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
+ Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
+ // for (size i = n - 1; i >= 2 ^ k; --i)
+ // tmp[i] op= tmp[i-pow2k];
+ llvm::BasicBlock *InnerLoopBB =
+ CGF.createBasicBlock("omp.inner.log.scan.body");
+ llvm::BasicBlock *InnerExitBB =
+ CGF.createBasicBlock("omp.inner.log.scan.exit");
+ llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerLoopBB);
+ auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ IVal->addIncoming(NMin1, LoopBB);
+ {
+ CodeGenFunction::OMPPrivateScope PrivScope(CGF);
+ auto *ILHS = LHSs.begin();
+ auto *IRHS = RHSs.begin();
+ for (const Expr *CopyArrayElem : CopyArrayElems) {
+ const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ Address LHSAddr = Address::invalid();
+ {
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IVal));
+ LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
+ Address RHSAddr = Address::invalid();
+ {
+ llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(OffsetIVal));
+ RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
+ ++ILHS;
+ ++IRHS;
+ }
+ PrivScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitReduction(
+ CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+ {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
+ }
+ llvm::Value *NextIVal =
+ CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
+ CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerExitBB);
+ llvm::Value *Next =
+ CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
+ Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
+ // pow2k <<= 1;
+ llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+ Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
+ llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
+ CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+ auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
+ CGF.EmitBlock(ExitBB);
+
+ CGF.OMPFirstScanLoop = false;
+ SecondGen(CGF);
+}
+
+static bool emitWorksharingDirective(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ bool HasCancel) {
+ bool HasLastprivates;
+ if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ })) {
+ const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ OMPLoopScope LoopScope(CGF, S);
+ return CGF.EmitScalarExpr(S.getNumIterations());
+ };
+ const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(
+ CGF, S.getDirectiveKind(), HasCancel);
+ (void)CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ // Emit an implicit barrier at the end.
+ CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
+ OMPD_for);
+ };
+ const auto &&SecondGen = [&S, HasCancel,
+ &HasLastprivates](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(
+ CGF, S.getDirectiveKind(), HasCancel);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ };
+ emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
+ } else {
+ CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
+ HasCancel);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
+ }
+ return HasLastprivates;
+}
+
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
+ HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
S.hasCancel());
@@ -2773,17 +3356,19 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
// Emit an implicit barrier at the end.
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
- emitForLoopBounds,
- emitDispatchForLoopBounds);
+ HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
@@ -2791,6 +3376,8 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
// Emit an implicit barrier at the end.
if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
@@ -2808,7 +3395,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, CapturedStmt, CS,
&HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
- ASTContext &C = CGF.getContext();
+ const ASTContext &C = CGF.getContext();
QualType KmpInt32Ty =
C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
// Emit helper vars inits.
@@ -2830,11 +3417,13 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
// Generate condition for loop.
- BinaryOperator Cond(&IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue,
- OK_Ordinary, S.getBeginLoc(), FPOptions());
+ BinaryOperator *Cond = BinaryOperator::Create(
+ C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
+ S.getBeginLoc(), FPOptionsOverride());
// Increment for loop counter.
- UnaryOperator Inc(&IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
- S.getBeginLoc(), true);
+ UnaryOperator *Inc = UnaryOperator::Create(
+ C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
+ S.getBeginLoc(), true, FPOptionsOverride());
auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
// Iterate through all sections and emit a switch construct:
// switch (IV) {
@@ -2847,7 +3436,6 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// break;
// }
// .omp.sections.exit:
- CGF.CGM.getOpenMPRuntime().initLastprivateConditionalCounter(CGF, S);
llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.sections.exit");
llvm::SwitchInst *SwitchStmt =
CGF.Builder.CreateSwitch(CGF.EmitLoadOfScalar(IV, S.getBeginLoc()),
@@ -2905,7 +3493,7 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
// IV = LB;
CGF.EmitStoreOfScalar(CGF.EmitLoadOfScalar(LB, S.getBeginLoc()), IV);
// while (idx <= UB) { BODY; ++idx; }
- CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, &Cond, &Inc, BodyGen,
+ CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
[](CodeGenFunction &) {});
// Tell the runtime we are done.
auto &&CodeGen = [&S](CodeGenFunction &CGF) {
@@ -2949,6 +3537,8 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
EmitSections(S);
}
@@ -2957,6 +3547,8 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
OMPD_sections);
}
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
@@ -2995,6 +3587,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
};
{
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitSingleRegion(*this, CodeGen, S.getBeginLoc(),
CopyprivateVars, DestExprs,
@@ -3007,6 +3601,8 @@ void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
*this, S.getBeginLoc(),
S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
}
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
@@ -3018,11 +3614,75 @@ static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
}
void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const CapturedStmt *CS = S.getInnermostCapturedStmt();
+ const Stmt *MasterRegionBodyStmt = CS->getCapturedStmt();
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MasterRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+ Builder.restoreIP(OMPBuilder.CreateMaster(Builder, BodyGenCB, FiniCB));
+
+ return;
+ }
OMPLexicalScope Scope(*this, S, OMPD_unknown);
emitMaster(*this, S);
}
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const CapturedStmt *CS = S.getInnermostCapturedStmt();
+ const Stmt *CriticalRegionBodyStmt = CS->getCapturedStmt();
+ const Expr *Hint = nullptr;
+ if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
+ Hint = HintClause->getHint();
+
+ // TODO: This is slightly different from what's currently being done in
+ // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
+ // about typing is final.
+ llvm::Value *HintInst = nullptr;
+ if (Hint)
+ HintInst =
+ Builder.CreateIntCast(EmitScalarExpr(Hint), CGM.Int32Ty, false);
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CriticalRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+ Builder.restoreIP(OMPBuilder.CreateCritical(
+ Builder, BodyGenCB, FiniCB, S.getDirectiveName().getAsString(),
+ HintInst));
+
+ return;
+ }
+
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
@@ -3042,12 +3702,16 @@ void CodeGenFunction::EmitOMPParallelForDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
- CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
- emitDispatchForLoopBounds);
+ (void)emitWorksharingDirective(CGF, S, S.hasCancel());
};
- emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
- emitEmptyBoundParameters);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
+ emitEmptyBoundParameters);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -3056,11 +3720,16 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
Action.Enter(CGF);
- CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
- emitDispatchForLoopBounds);
+ (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
- emitEmptyBoundParameters);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
+ emitEmptyBoundParameters);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPParallelMasterDirective(
@@ -3086,10 +3755,16 @@ void CodeGenFunction::EmitOMPParallelMasterDirective(
emitMaster(CGF, S);
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
- emitEmptyBoundParameters);
- emitPostUpdateForReductionClause(*this, S,
- [](CodeGenFunction &) { return nullptr; });
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
+ emitEmptyBoundParameters);
+ emitPostUpdateForReductionClause(*this, S,
+ [](CodeGenFunction &) { return nullptr; });
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -3100,8 +3775,14 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
Action.Enter(CGF);
CGF.EmitSections(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
- emitEmptyBoundParameters);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
+ emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
+ emitEmptyBoundParameters);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, S);
}
void CodeGenFunction::EmitOMPTaskBasedDirective(
@@ -3188,33 +3869,28 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
SmallVector<const Expr *, 4> LHSs;
SmallVector<const Expr *, 4> RHSs;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
*this, S.getBeginLoc(), LHSs, RHSs, Data);
// Build list of dependences.
- for (const auto *C : S.getClausesOfKind<OMPDependClause>())
- for (const Expr *IRef : C->varlists())
- Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
+ for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
+ OMPTaskDataTy::DependData &DD =
+ Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
+ DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
+ }
auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
CapturedRegion](CodeGenFunction &CGF,
PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
+ llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
!Data.LastprivateVars.empty()) {
llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
@@ -3241,6 +3917,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()),
".firstpriv.ptr.addr");
PrivatePtrs.emplace_back(VD, PrivatePtr);
+ FirstprivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
}
for (const Expr *E : Data.LastprivateVars) {
@@ -3271,13 +3948,21 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
}
}
if (Data.Reductions) {
+ OMPPrivateScope FirstprivateScope(CGF);
+ for (const auto &Pair : FirstprivatePtrs) {
+ Address Replacement(CGF.Builder.CreateLoad(Pair.second),
+ CGF.getContext().getDeclAlign(Pair.first));
+ FirstprivateScope.addPrivate(Pair.first,
+ [Replacement]() { return Replacement; });
+ }
+ (void)FirstprivateScope.Privatize();
OMPLexicalScope LexScope(CGF, S, CapturedRegion);
- ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionCopies,
- Data.ReductionOps);
+ ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
+ Data.ReductionCopies, Data.ReductionOps);
llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(9)));
for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
- RedCG.emitSharedLValue(CGF, Cnt);
+ RedCG.emitSharedOrigLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
// FIXME: This must removed once the runtime library is fixed.
// Emit required threadprivate variables for
@@ -3322,9 +4007,9 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
// privatized earlier.
OMPPrivateScope InRedScope(CGF);
if (!InRedVars.empty()) {
- ReductionCodeGen RedCG(InRedVars, InRedPrivs, InRedOps);
+ ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
- RedCG.emitSharedLValue(CGF, Cnt);
+ RedCG.emitSharedOrigLValue(CGF, Cnt);
RedCG.emitAggregateType(CGF, Cnt);
// The taskgroup descriptor variable is always implicit firstprivate and
// privatized already during processing of the firstprivates.
@@ -3333,9 +4018,13 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
// initializer/combiner/finalizer.
CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getBeginLoc(),
RedCG, Cnt);
- llvm::Value *ReductionsPtr =
- CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
- TaskgroupDescriptors[Cnt]->getExprLoc());
+ llvm::Value *ReductionsPtr;
+ if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
+ ReductionsPtr = CGF.EmitLoadOfScalar(CGF.EmitLValue(TRExpr),
+ TRExpr->getExprLoc());
+ } else {
+ ReductionsPtr = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
+ }
Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
CGF, S.getBeginLoc(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
Replacement = Address(
@@ -3448,9 +4137,11 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
}
(void)TargetScope.Privatize();
// Build list of dependences.
- for (const auto *C : S.getClausesOfKind<OMPDependClause>())
- for (const Expr *IRef : C->varlists())
- Data.Dependences.emplace_back(C->getDependencyKind(), IRef);
+ for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
+ OMPTaskDataTy::DependData &DD =
+ Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
+ DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
+ }
auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
&InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
// Set proper addresses for generated private copies.
@@ -3537,6 +4228,8 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
SharedsTy, CapturedStruct, IfCond,
Data);
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
}
@@ -3562,21 +4255,13 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
SmallVector<const Expr *, 4> RHSs;
OMPTaskDataTy Data;
for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
- auto IPriv = C->privates().begin();
- auto IRed = C->reduction_ops().begin();
- auto ILHS = C->lhs_exprs().begin();
- auto IRHS = C->rhs_exprs().begin();
- for (const Expr *Ref : C->varlists()) {
- Data.ReductionVars.emplace_back(Ref);
- Data.ReductionCopies.emplace_back(*IPriv);
- Data.ReductionOps.emplace_back(*IRed);
- LHSs.emplace_back(*ILHS);
- RHSs.emplace_back(*IRHS);
- std::advance(IPriv, 1);
- std::advance(IRed, 1);
- std::advance(ILHS, 1);
- std::advance(IRHS, 1);
- }
+ Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
+ Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
+ Data.ReductionOps.append(C->reduction_ops().begin(),
+ C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
}
llvm::Value *ReductionDesc =
CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, S.getBeginLoc(),
@@ -3593,6 +4278,9 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
}
void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
+ llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
+ ? llvm::AtomicOrdering::NotAtomic
+ : llvm::AtomicOrdering::AcquireRelease;
CGM.getOpenMPRuntime().emitFlush(
*this,
[&S]() -> ArrayRef<const Expr *> {
@@ -3601,7 +4289,233 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
FlushClause->varlist_end());
return llvm::None;
}(),
- S.getBeginLoc());
+ S.getBeginLoc(), AO);
+}
+
+void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
+ const auto *DO = S.getSingleClause<OMPDepobjClause>();
+ LValue DOLVal = EmitLValue(DO->getDepobj());
+ if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
+ OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
+ DC->getModifier());
+ Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
+ Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
+ *this, Dependencies, DC->getBeginLoc());
+ EmitStoreOfScalar(DepAddr.getPointer(), DOLVal);
+ return;
+ }
+ if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
+ CGM.getOpenMPRuntime().emitDestroyClause(*this, DOLVal, DC->getBeginLoc());
+ return;
+ }
+ if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
+ CGM.getOpenMPRuntime().emitUpdateClause(
+ *this, DOLVal, UC->getDependencyKind(), UC->getBeginLoc());
+ return;
+ }
+}
+
+void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
+ if (!OMPParentLoopDirectiveForScan)
+ return;
+ const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
+ bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
+ SmallVector<const Expr *, 4> Shareds;
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> CopyOps;
+ SmallVector<const Expr *, 4> CopyArrayTemps;
+ SmallVector<const Expr *, 4> CopyArrayElems;
+ for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
+ if (C->getModifier() != OMPC_REDUCTION_inscan)
+ continue;
+ Shareds.append(C->varlist_begin(), C->varlist_end());
+ Privates.append(C->privates().begin(), C->privates().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
+ CopyArrayTemps.append(C->copy_array_temps().begin(),
+ C->copy_array_temps().end());
+ CopyArrayElems.append(C->copy_array_elems().begin(),
+ C->copy_array_elems().end());
+ }
+ if (ParentDir.getDirectiveKind() == OMPD_simd ||
+ (getLangOpts().OpenMPSimd &&
+ isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
+ // For simd directive and simd-based directives in simd only mode, use the
+ // following codegen:
+ // int x = 0;
+ // #pragma omp simd reduction(inscan, +: x)
+ // for (..) {
+ // <first part>
+ // #pragma omp scan inclusive(x)
+ // <second part>
+ // }
+ // is transformed to:
+ // int x = 0;
+ // for (..) {
+ // int x_priv = 0;
+ // <first part>
+ // x = x_priv + x;
+ // x_priv = x;
+ // <second part>
+ // }
+ // and
+ // int x = 0;
+ // #pragma omp simd reduction(inscan, +: x)
+ // for (..) {
+ // <first part>
+ // #pragma omp scan exclusive(x)
+ // <second part>
+ // }
+ // to
+ // int x = 0;
+ // for (..) {
+ // int x_priv = 0;
+ // <second part>
+ // int temp = x;
+ // x = x_priv + x;
+ // x_priv = temp;
+ // <first part>
+ // }
+ llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce");
+ EmitBranch(IsInclusive
+ ? OMPScanReduce
+ : BreakContinueStack.back().ContinueBlock.getBlock());
+ EmitBlock(OMPScanDispatch);
+ {
+ // New scope for correct construction/destruction of temp variables for
+ // exclusive scan.
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
+ EmitBlock(OMPScanReduce);
+ if (!IsInclusive) {
+ // Create temp var and copy LHS value to this temp value.
+ // TMP = LHS;
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ const Expr *TempExpr = CopyArrayTemps[I];
+ EmitAutoVarDecl(
+ *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl()));
+ LValue DestLVal = EmitLValue(TempExpr);
+ LValue SrcLVal = EmitLValue(LHSs[I]);
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ }
+ CGM.getOpenMPRuntime().emitReduction(
+ *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+ {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ LValue DestLVal;
+ LValue SrcLVal;
+ if (IsInclusive) {
+ DestLVal = EmitLValue(RHSs[I]);
+ SrcLVal = EmitLValue(LHSs[I]);
+ } else {
+ const Expr *TempExpr = CopyArrayTemps[I];
+ DestLVal = EmitLValue(RHSs[I]);
+ SrcLVal = EmitLValue(TempExpr);
+ }
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ }
+ EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
+ OMPScanExitBlock = IsInclusive
+ ? BreakContinueStack.back().ContinueBlock.getBlock()
+ : OMPScanReduce;
+ EmitBlock(OMPAfterScanBlock);
+ return;
+ }
+ if (!IsInclusive) {
+ EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+ EmitBlock(OMPScanExitBlock);
+ }
+ if (OMPFirstScanLoop) {
+ // Emit buffer[i] = red; at the end of the input phase.
+ const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
+ .getIterationVariable()
+ ->IgnoreParenImpCasts();
+ LValue IdxLVal = EmitLValue(IVExpr);
+ llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
+ IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ const Expr *OrigExpr = Shareds[I];
+ const Expr *CopyArrayElem = CopyArrayElems[I];
+ OpaqueValueMapping IdxMapping(
+ *this,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IdxVal));
+ LValue DestLVal = EmitLValue(CopyArrayElem);
+ LValue SrcLVal = EmitLValue(OrigExpr);
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ }
+ EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+ if (IsInclusive) {
+ EmitBlock(OMPScanExitBlock);
+ EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock());
+ }
+ EmitBlock(OMPScanDispatch);
+ if (!OMPFirstScanLoop) {
+ // Emit red = buffer[i]; at the entrance to the scan phase.
+ const auto *IVExpr = cast<OMPLoopDirective>(ParentDir)
+ .getIterationVariable()
+ ->IgnoreParenImpCasts();
+ LValue IdxLVal = EmitLValue(IVExpr);
+ llvm::Value *IdxVal = EmitLoadOfScalar(IdxLVal, IVExpr->getExprLoc());
+ IdxVal = Builder.CreateIntCast(IdxVal, SizeTy, /*isSigned=*/false);
+ llvm::BasicBlock *ExclusiveExitBB = nullptr;
+ if (!IsInclusive) {
+ llvm::BasicBlock *ContBB = createBasicBlock("omp.exclusive.dec");
+ ExclusiveExitBB = createBasicBlock("omp.exclusive.copy.exit");
+ llvm::Value *Cmp = Builder.CreateIsNull(IdxVal);
+ Builder.CreateCondBr(Cmp, ExclusiveExitBB, ContBB);
+ EmitBlock(ContBB);
+ // Use idx - 1 iteration for exclusive scan.
+ IdxVal = Builder.CreateNUWSub(IdxVal, llvm::ConstantInt::get(SizeTy, 1));
+ }
+ for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
+ const Expr *PrivateExpr = Privates[I];
+ const Expr *OrigExpr = Shareds[I];
+ const Expr *CopyArrayElem = CopyArrayElems[I];
+ OpaqueValueMapping IdxMapping(
+ *this,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IdxVal));
+ LValue SrcLVal = EmitLValue(CopyArrayElem);
+ LValue DestLVal = EmitLValue(OrigExpr);
+ EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this),
+ SrcLVal.getAddress(*this),
+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl()),
+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl()),
+ CopyOps[I]);
+ }
+ if (!IsInclusive) {
+ EmitBlock(ExclusiveExitBB);
+ }
+ }
+ EmitBranch((OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
+ : OMPAfterScanBlock);
+ EmitBlock(OMPAfterScanBlock);
}
void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
@@ -3790,7 +4704,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
});
EmitBlock(LoopExit.getBlock());
// Tell the runtime we are done.
- RT.emitForStaticFinish(*this, S.getBeginLoc(), S.getDirectiveKind());
+ RT.emitForStaticFinish(*this, S.getEndLoc(), S.getDirectiveKind());
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
@@ -3843,11 +4757,12 @@ void CodeGenFunction::EmitOMPDistributeDirective(
}
static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
- const CapturedStmt *S) {
+ const CapturedStmt *S,
+ SourceLocation Loc) {
CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
CGF.CapturedStmtInfo = &CapStmtInfo;
- llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S);
+ llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
Fn->setDoesNotRecurse();
return Fn;
}
@@ -3867,7 +4782,8 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
if (C) {
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
- llvm::Function *OutlinedFn = emitOutlinedOrderedFunction(CGM, CS);
+ llvm::Function *OutlinedFn =
+ emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, S.getBeginLoc(),
OutlinedFn, CapturedVars);
} else {
@@ -3918,16 +4834,22 @@ convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
return ComplexVal;
}
-static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
LValue LVal, RValue RVal) {
- if (LVal.isGlobalReg()) {
+ if (LVal.isGlobalReg())
CGF.EmitStoreThroughGlobalRegLValue(RVal, LVal);
- } else {
- CGF.EmitAtomicStore(RVal, LVal,
- IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic,
- LVal.isVolatile(), /*isInit=*/false);
- }
+ else
+ CGF.EmitAtomicStore(RVal, LVal, AO, LVal.isVolatile(), /*isInit=*/false);
+}
+
+static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO, LValue LVal,
+ SourceLocation Loc) {
+ if (LVal.isGlobalReg())
+ return CGF.EmitLoadOfLValue(LVal, Loc);
+ return CGF.EmitAtomicLoad(
+ LVal, Loc, llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO),
+ LVal.isVolatile());
}
void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
@@ -3948,7 +4870,7 @@ void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
}
}
-static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
const Expr *X, const Expr *V,
SourceLocation Loc) {
// v = x;
@@ -3956,34 +4878,54 @@ static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
LValue XLValue = CGF.EmitLValue(X);
LValue VLValue = CGF.EmitLValue(V);
- RValue Res = XLValue.isGlobalReg()
- ? CGF.EmitLoadOfLValue(XLValue, Loc)
- : CGF.EmitAtomicLoad(
- XLValue, Loc,
- IsSeqCst ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic,
- XLValue.isVolatile());
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ RValue Res = emitSimpleAtomicLoad(CGF, AO, XLValue, Loc);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the read or capture clause is specified and the acquire, acq_rel, or
+ // seq_cst clause is specified then the strong flush on exit from the atomic
+ // operation is also an acquire flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Acquire:
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Acquire);
+ break;
+ case llvm::AtomicOrdering::Monotonic:
+ case llvm::AtomicOrdering::Release:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
}
-static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst,
- const Expr *X, const Expr *E,
- SourceLocation Loc) {
+static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO, const Expr *X,
+ const Expr *E, SourceLocation Loc) {
// x = expr;
assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
- emitSimpleAtomicStore(CGF, IsSeqCst, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ emitSimpleAtomicStore(CGF, AO, CGF.EmitLValue(X), CGF.EmitAnyExpr(E));
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
@@ -4104,10 +5046,10 @@ std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
return Res;
}
-static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
- const Expr *X, const Expr *E,
- const Expr *UE, bool IsXLHSInRHSPart,
- SourceLocation Loc) {
+static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO, const Expr *X,
+ const Expr *E, const Expr *UE,
+ bool IsXLHSInRHSPart, SourceLocation Loc) {
assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
"Update expr in 'atomic update' must be a binary operator.");
const auto *BOUE = cast<BinaryOperator>(UE->IgnoreImpCasts());
@@ -4120,9 +5062,6 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
LValue XLValue = CGF.EmitLValue(X);
RValue ExprRValue = CGF.EmitAnyExpr(E);
- llvm::AtomicOrdering AO = IsSeqCst
- ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic;
const auto *LHS = cast<OpaqueValueExpr>(BOUE->getLHS()->IgnoreImpCasts());
const auto *RHS = cast<OpaqueValueExpr>(BOUE->getRHS()->IgnoreImpCasts());
const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
@@ -4134,12 +5073,25 @@ static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, bool IsSeqCst,
};
(void)CGF.EmitOMPAtomicSimpleUpdateExpr(
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
static RValue convertToType(CodeGenFunction &CGF, RValue Value,
@@ -4159,7 +5111,8 @@ static RValue convertToType(CodeGenFunction &CGF, RValue Value,
llvm_unreachable("Must be a scalar or complex.");
}
-static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
+static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
+ llvm::AtomicOrdering AO,
bool IsPostfixUpdate, const Expr *V,
const Expr *X, const Expr *E,
const Expr *UE, bool IsXLHSInRHSPart,
@@ -4170,9 +5123,6 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
LValue VLValue = CGF.EmitLValue(V);
LValue XLValue = CGF.EmitLValue(X);
RValue ExprRValue = CGF.EmitAnyExpr(E);
- llvm::AtomicOrdering AO = IsSeqCst
- ? llvm::AtomicOrdering::SequentiallyConsistent
- : llvm::AtomicOrdering::Monotonic;
QualType NewVValType;
if (UE) {
// 'x' is updated with some additional value.
@@ -4200,6 +5150,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
};
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
XLValue, ExprRValue, BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, Gen);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
if (Res.first) {
// 'atomicrmw' instruction was generated.
if (IsPostfixUpdate) {
@@ -4226,6 +5177,7 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
XLValue, ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
Loc, Gen);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, X);
if (Res.first) {
// 'atomicrmw' instruction was generated.
NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
@@ -4233,32 +5185,54 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst,
}
// Emit post-update store to 'v' of old/new 'x' value.
CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
- // OpenMP, 2.12.6, atomic Construct
- // Any atomic construct with a seq_cst clause forces the atomically
- // performed operation to include an implicit flush operation without a
- // list.
- if (IsSeqCst)
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc);
+ CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ // If the read or capture clause is specified and the acquire, acq_rel, or
+ // seq_cst clause is specified then the strong flush on exit from the atomic
+ // operation is also an acquire flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Acquire);
+ break;
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::AcquireRelease);
+ break;
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
- bool IsSeqCst, bool IsPostfixUpdate,
+ llvm::AtomicOrdering AO, bool IsPostfixUpdate,
const Expr *X, const Expr *V, const Expr *E,
const Expr *UE, bool IsXLHSInRHSPart,
SourceLocation Loc) {
switch (Kind) {
case OMPC_read:
- emitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
+ emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
break;
case OMPC_write:
- emitOMPAtomicWriteExpr(CGF, IsSeqCst, X, E, Loc);
+ emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
break;
case OMPC_unknown:
case OMPC_update:
- emitOMPAtomicUpdateExpr(CGF, IsSeqCst, X, E, UE, IsXLHSInRHSPart, Loc);
+ emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
break;
case OMPC_capture:
- emitOMPAtomicCaptureExpr(CGF, IsSeqCst, IsPostfixUpdate, V, X, E, UE,
+ emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
IsXLHSInRHSPart, Loc);
break;
case OMPC_if:
@@ -4277,12 +5251,17 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_collapse:
case OMPC_default:
case OMPC_seq_cst:
+ case OMPC_acq_rel:
+ case OMPC_acquire:
+ case OMPC_release:
+ case OMPC_relaxed:
case OMPC_shared:
case OMPC_linear:
case OMPC_aligned:
case OMPC_copyin:
case OMPC_copyprivate:
case OMPC_flush:
+ case OMPC_depobj:
case OMPC_proc_bind:
case OMPC_schedule:
case OMPC_ordered:
@@ -4308,6 +5287,7 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_to:
case OMPC_from:
case OMPC_use_device_ptr:
+ case OMPC_use_device_addr:
case OMPC_is_device_ptr:
case OMPC_unified_address:
case OMPC_unified_shared_memory:
@@ -4317,38 +5297,76 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_device_type:
case OMPC_match:
case OMPC_nontemporal:
+ case OMPC_order:
+ case OMPC_destroy:
+ case OMPC_detach:
+ case OMPC_inclusive:
+ case OMPC_exclusive:
+ case OMPC_uses_allocators:
+ case OMPC_affinity:
+ default:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
- bool IsSeqCst = S.getSingleClause<OMPSeqCstClause>();
+ llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
+ bool MemOrderingSpecified = false;
+ if (S.getSingleClause<OMPSeqCstClause>()) {
+ AO = llvm::AtomicOrdering::SequentiallyConsistent;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPAcqRelClause>()) {
+ AO = llvm::AtomicOrdering::AcquireRelease;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPAcquireClause>()) {
+ AO = llvm::AtomicOrdering::Acquire;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPReleaseClause>()) {
+ AO = llvm::AtomicOrdering::Release;
+ MemOrderingSpecified = true;
+ } else if (S.getSingleClause<OMPRelaxedClause>()) {
+ AO = llvm::AtomicOrdering::Monotonic;
+ MemOrderingSpecified = true;
+ }
OpenMPClauseKind Kind = OMPC_unknown;
for (const OMPClause *C : S.clauses()) {
- // Find first clause (skip seq_cst clause, if it is first).
- if (C->getClauseKind() != OMPC_seq_cst) {
+ // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
+ // if it is first).
+ if (C->getClauseKind() != OMPC_seq_cst &&
+ C->getClauseKind() != OMPC_acq_rel &&
+ C->getClauseKind() != OMPC_acquire &&
+ C->getClauseKind() != OMPC_release &&
+ C->getClauseKind() != OMPC_relaxed) {
Kind = C->getClauseKind();
break;
}
}
-
- const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
- if (const auto *FE = dyn_cast<FullExpr>(CS))
- enterFullExpression(FE);
- // Processing for statements under 'atomic capture'.
- if (const auto *Compound = dyn_cast<CompoundStmt>(CS)) {
- for (const Stmt *C : Compound->body()) {
- if (const auto *FE = dyn_cast<FullExpr>(C))
- enterFullExpression(FE);
+ if (!MemOrderingSpecified) {
+ llvm::AtomicOrdering DefaultOrder =
+ CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
+ if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
+ DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
+ (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
+ Kind == OMPC_capture)) {
+ AO = DefaultOrder;
+ } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
+ if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
+ AO = llvm::AtomicOrdering::Release;
+ } else if (Kind == OMPC_read) {
+ assert(Kind == OMPC_read && "Unexpected atomic kind.");
+ AO = llvm::AtomicOrdering::Acquire;
+ }
}
}
- auto &&CodeGen = [&S, Kind, IsSeqCst, CS](CodeGenFunction &CGF,
+ const Stmt *CS = S.getInnermostCapturedStmt()->IgnoreContainers();
+
+ auto &&CodeGen = [&S, Kind, AO, CS](CodeGenFunction &CGF,
PrePostActionTy &) {
CGF.EmitStopPoint(CS);
- emitOMPAtomicExpr(CGF, Kind, IsSeqCst, S.isPostfixUpdate(), S.getX(),
- S.getV(), S.getExpr(), S.getUpdateExpr(),
- S.isXLHSInRHSPart(), S.getBeginLoc());
+ emitOMPAtomicExpr(CGF, Kind, AO, S.isPostfixUpdate(), S.getX(), S.getV(),
+ S.getExpr(), S.getUpdateExpr(), S.isXLHSInRHSPart(),
+ S.getBeginLoc());
};
OMPLexicalScope Scope(*this, S, OMPD_unknown);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_atomic, CodeGen);
@@ -4370,6 +5388,8 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
return;
}
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
llvm::Function *Fn = nullptr;
llvm::Constant *FnID = nullptr;
@@ -4384,9 +5404,10 @@ static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
}
// Check if we have any device clause associated with the directive.
- const Expr *Device = nullptr;
+ llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
+ nullptr, OMPC_DEVICE_unknown);
if (auto *C = S.getSingleClause<OMPDeviceClause>())
- Device = C->getDevice();
+ Device.setPointerAndInt(C->getDevice(), C->getModifier());
// Check if we have an if clause whose conditional always evaluates to false
// or if we do not have any targets specified. If so the target region is not
@@ -4856,7 +5877,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
break;
}
}
- if (llvm::OpenMPIRBuilder *OMPBuilder = CGM.getOpenMPIRBuilder()) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
// TODO: This check is necessary as we only generate `omp parallel` through
// the OpenMPIRBuilder for now.
if (S.getCancelRegion() == OMPD_parallel) {
@@ -4865,7 +5887,7 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
IfCondition = EmitScalarExpr(IfCond,
/*IgnoreResultAssign=*/true);
return Builder.restoreIP(
- OMPBuilder->CreateCancel(Builder, IfCondition, S.getCancelRegion()));
+ OMPBuilder.CreateCancel(Builder, IfCondition, S.getCancelRegion()));
}
}
@@ -4876,7 +5898,8 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
CodeGenFunction::JumpDest
CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
if (Kind == OMPD_parallel || Kind == OMPD_task ||
- Kind == OMPD_target_parallel)
+ Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
+ Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
return ReturnBlock;
assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
@@ -4888,9 +5911,8 @@ CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
}
void CodeGenFunction::EmitOMPUseDevicePtrClause(
- const OMPClause &NC, OMPPrivateScope &PrivateScope,
+ const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
- const auto &C = cast<OMPUseDevicePtrClause>(NC);
auto OrigVarIt = C.varlist_begin();
auto InitIt = C.inits().begin();
for (const Expr *PvtVarIt : C.private_copies()) {
@@ -4951,6 +5973,60 @@ void CodeGenFunction::EmitOMPUseDevicePtrClause(
}
}
+static const VarDecl *getBaseDecl(const Expr *Ref) {
+ const Expr *Base = Ref->IgnoreParenImpCasts();
+ while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Base))
+ Base = OASE->getBase()->IgnoreParenImpCasts();
+ while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Base))
+ Base = ASE->getBase()->IgnoreParenImpCasts();
+ return cast<VarDecl>(cast<DeclRefExpr>(Base)->getDecl());
+}
+
+void CodeGenFunction::EmitOMPUseDeviceAddrClause(
+ const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
+ const llvm::DenseMap<const ValueDecl *, Address> &CaptureDeviceAddrMap) {
+ llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
+ for (const Expr *Ref : C.varlists()) {
+ const VarDecl *OrigVD = getBaseDecl(Ref);
+ if (!Processed.insert(OrigVD).second)
+ continue;
+ // In order to identify the right initializer we need to match the
+ // declaration used by the mapping logic. In some cases we may get
+ // OMPCapturedExprDecl that refers to the original declaration.
+ const ValueDecl *MatchingVD = OrigVD;
+ if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
+ // OMPCapturedExprDecl are used to privative fields of the current
+ // structure.
+ const auto *ME = cast<MemberExpr>(OED->getInit());
+ assert(isa<CXXThisExpr>(ME->getBase()) &&
+ "Base should be the current struct!");
+ MatchingVD = ME->getMemberDecl();
+ }
+
+ // If we don't have information about the current list item, move on to
+ // the next one.
+ auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
+ if (InitAddrIt == CaptureDeviceAddrMap.end())
+ continue;
+
+ Address PrivAddr = InitAddrIt->getSecond();
+ // For declrefs and variable length array need to load the pointer for
+ // correct mapping, since the pointer to the data was passed to the runtime.
+ if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
+ MatchingVD->getType()->isArrayType())
+ PrivAddr =
+ EmitLoadOfPointer(PrivAddr, getContext()
+ .getPointerType(OrigVD->getType())
+ ->castAs<PointerType>());
+ llvm::Type *RealTy =
+ ConvertTypeForMem(OrigVD->getType().getNonReferenceType())
+ ->getPointerTo();
+ PrivAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(PrivAddr, RealTy);
+
+ (void)PrivateScope.addPrivate(OrigVD, [PrivAddr]() { return PrivAddr; });
+ }
+}
+
// Generate the instructions for '#pragma omp target data' directive.
void CodeGenFunction::EmitOMPTargetDataDirective(
const OMPTargetDataDirective &S) {
@@ -4995,9 +6071,13 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
Info.CaptureDeviceAddrMap);
+ for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
+ CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
+ Info.CaptureDeviceAddrMap);
(void)PrivateScope.Privatize();
RCG(CGF);
} else {
+ OMPLexicalScope Scope(CGF, S, OMPD_unknown);
RCG(CGF);
}
};
@@ -5222,7 +6302,11 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
// Emit outlined function for task construct.
const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
- Address CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ Address CapturedStruct = Address::invalid();
+ {
+ OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
+ CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ }
QualType SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl());
const Expr *IfCond = nullptr;
for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
@@ -5322,8 +6406,8 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
CGF.EmitOMPInnerLoop(
S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
[&S](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, CodeGenFunction::JumpDest());
- CGF.EmitStopPoint(&S);
+ emitOMPLoopBodyWithStopPoint(CGF, S,
+ CodeGenFunction::JumpDest());
},
[](CodeGenFunction &) {});
});
@@ -5376,11 +6460,15 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
}
void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
EmitOMPTaskLoopBasedDirective(S);
}
void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
const OMPTaskLoopSimdDirective &S) {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S);
EmitOMPTaskLoopBasedDirective(S);
}
@@ -5391,6 +6479,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
Action.Enter(CGF);
EmitOMPTaskLoopBasedDirective(S);
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S, llvm::None, /*EmitPreInitStmt=*/false);
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
@@ -5401,6 +6491,8 @@ void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
Action.Enter(CGF);
EmitOMPTaskLoopBasedDirective(S);
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
OMPLexicalScope Scope(*this, S);
CGM.getOpenMPRuntime().emitMasterRegion(*this, CodeGen, S.getBeginLoc());
}
@@ -5413,10 +6505,12 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
Action.Enter(CGF);
CGF.EmitOMPTaskLoopBasedDirective(S);
};
- OMPLexicalScope Scope(CGF, S, llvm::None, /*EmitPreInitStmt=*/false);
+ OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
S.getBeginLoc());
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
emitEmptyBoundParameters);
}
@@ -5433,6 +6527,8 @@ void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
CGM.getOpenMPRuntime().emitMasterRegion(CGF, TaskLoopCodeGen,
S.getBeginLoc());
};
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
emitEmptyBoundParameters);
}
@@ -5461,19 +6557,43 @@ void CodeGenFunction::EmitOMPTargetUpdateDirective(
void CodeGenFunction::EmitSimpleOMPExecutableDirective(
const OMPExecutableDirective &D) {
+ if (const auto *SD = dyn_cast<OMPScanDirective>(&D)) {
+ EmitOMPScanDirective(*SD);
+ return;
+ }
if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
return;
auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ OMPPrivateScope GlobalsScope(CGF);
+ if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
+ // Capture global firstprivates to avoid crash.
+ for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
+ for (const Expr *Ref : C->varlists()) {
+ const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
+ if (!DRE)
+ continue;
+ const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
+ if (!VD || VD->hasLocalStorage())
+ continue;
+ if (!CGF.LocalDeclMap.count(VD)) {
+ LValue GlobLVal = CGF.EmitLValue(Ref);
+ GlobalsScope.addPrivate(
+ VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
+ }
+ }
+ }
+ }
if (isOpenMPSimdDirective(D.getDirectiveKind())) {
+ (void)GlobalsScope.Privatize();
+ ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
emitOMPSimdRegion(CGF, cast<OMPLoopDirective>(D), Action);
} else {
- OMPPrivateScope LoopGlobals(CGF);
if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
for (const Expr *E : LD->counters()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
LValue GlobLVal = CGF.EmitLValue(E);
- LoopGlobals.addPrivate(
+ GlobalsScope.addPrivate(
VD, [&GlobLVal, &CGF]() { return GlobLVal.getAddress(CGF); });
}
if (isa<OMPCapturedExprDecl>(VD)) {
@@ -5497,14 +6617,20 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
}
}
}
- LoopGlobals.Privatize();
+ (void)GlobalsScope.Privatize();
CGF.EmitStmt(D.getInnermostCapturedStmt()->getCapturedStmt());
}
};
- OMPSimdLexicalScope Scope(*this, D);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this,
- isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
- : D.getDirectiveKind(),
- CodeGen);
+ {
+ auto LPCRegion =
+ CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, D);
+ OMPSimdLexicalScope Scope(*this, D);
+ CGM.getOpenMPRuntime().emitInlinedDirective(
+ *this,
+ isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
+ : D.getDirectiveKind(),
+ CodeGen);
+ }
+ // Check for outer lastprivate conditional update.
+ checkForLastprivateConditionalUpdate(*this, D);
}