aboutsummaryrefslogtreecommitdiff
path: root/clang/lib/CodeGen/CGStmtOpenMP.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp925
1 files changed, 696 insertions, 229 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 5e8d98cfe5ef..f6233b791182 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -133,50 +133,55 @@ public:
/// Private scope for OpenMP loop-based directives, that supports capturing
/// of used expression from loop statement.
class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
- void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) {
+ void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
+ const DeclStmt *PreInits;
CodeGenFunction::OMPMapVars PreCondVars;
- llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
- for (const auto *E : S.counters()) {
- const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
- EmittedAsPrivate.insert(VD->getCanonicalDecl());
- (void)PreCondVars.setVarAddr(
- CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
- }
- // Mark private vars as undefs.
- for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
- for (const Expr *IRef : C->varlists()) {
- const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
- if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
- (void)PreCondVars.setVarAddr(
- CGF, OrigVD,
- Address(llvm::UndefValue::get(
- CGF.ConvertTypeForMem(CGF.getContext().getPointerType(
- OrigVD->getType().getNonReferenceType()))),
- CGF.getContext().getDeclAlign(OrigVD)));
- }
+ if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) {
+ llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
+ for (const auto *E : LD->counters()) {
+ const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
+ EmittedAsPrivate.insert(VD->getCanonicalDecl());
+ (void)PreCondVars.setVarAddr(
+ CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
}
- }
- (void)PreCondVars.apply(CGF);
- // Emit init, __range and __end variables for C++ range loops.
- const Stmt *Body =
- S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
- for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) {
- Body = OMPLoopDirective::tryToFindNextInnerLoop(
- Body, /*TryImperfectlyNestedLoops=*/true);
- if (auto *For = dyn_cast<ForStmt>(Body)) {
- Body = For->getBody();
- } else {
- assert(isa<CXXForRangeStmt>(Body) &&
- "Expected canonical for loop or range-based for loop.");
- auto *CXXFor = cast<CXXForRangeStmt>(Body);
- if (const Stmt *Init = CXXFor->getInit())
- CGF.EmitStmt(Init);
- CGF.EmitStmt(CXXFor->getRangeStmt());
- CGF.EmitStmt(CXXFor->getEndStmt());
- Body = CXXFor->getBody();
+ // Mark private vars as undefs.
+ for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
+ for (const Expr *IRef : C->varlists()) {
+ const auto *OrigVD =
+ cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
+ if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
+ (void)PreCondVars.setVarAddr(
+ CGF, OrigVD,
+ Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
+ CGF.getContext().getPointerType(
+ OrigVD->getType().getNonReferenceType()))),
+ CGF.getContext().getDeclAlign(OrigVD)));
+ }
+ }
}
+ (void)PreCondVars.apply(CGF);
+ // Emit init, __range and __end variables for C++ range loops.
+ (void)OMPLoopBasedDirective::doForAllLoops(
+ LD->getInnermostCapturedStmt()->getCapturedStmt(),
+ /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
+ [&CGF](unsigned Cnt, const Stmt *CurStmt) {
+ if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) {
+ if (const Stmt *Init = CXXFor->getInit())
+ CGF.EmitStmt(Init);
+ CGF.EmitStmt(CXXFor->getRangeStmt());
+ CGF.EmitStmt(CXXFor->getEndStmt());
+ }
+ return false;
+ });
+ PreInits = cast_or_null<DeclStmt>(LD->getPreInits());
+ } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) {
+ PreInits = cast_or_null<DeclStmt>(Tile->getPreInits());
+ } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) {
+ PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits());
+ } else {
+ llvm_unreachable("Unknown loop-based directive kind.");
}
- if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) {
+ if (PreInits) {
for (const auto *I : PreInits->decls())
CGF.EmitVarDecl(cast<VarDecl>(*I));
}
@@ -184,7 +189,7 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
}
public:
- OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S)
+ OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
: CodeGenFunction::RunCleanupsScope(CGF) {
emitPreInitStmt(CGF, S);
}
@@ -238,11 +243,22 @@ public:
if (const Expr *E = TG->getReductionRef())
CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()));
}
+ // Temp copy arrays for inscan reductions should not be emitted as they are
+ // not used in simd only mode.
+ llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ if (C->getModifier() != OMPC_REDUCTION_inscan)
+ continue;
+ for (const Expr *E : C->copy_array_temps())
+ CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl());
+ }
const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt());
while (CS) {
for (auto &C : CS->captures()) {
if (C.capturesVariable() || C.capturesVariableByCopy()) {
auto *VD = C.getCapturedVar();
+ if (CopyArrayTemps.contains(VD))
+ continue;
assert(VD == VD->getCanonicalDecl() &&
"Canonical decl must be captured.");
DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
@@ -501,6 +517,10 @@ static llvm::Function *emitOutlinedFunctionPrologue(
F->setDoesNotThrow();
F->setDoesNotRecurse();
+ // Always inline the outlined function if optimizations are enabled.
+ if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+ F->addFnAttr(llvm::Attribute::AlwaysInline);
+
// Generate the function.
CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs,
FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
@@ -631,6 +651,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes,
WrapperCGF.CXXThisValue, WrapperFO);
llvm::SmallVector<llvm::Value *, 4> CallArgs;
+ auto *PI = F->arg_begin();
for (const auto *Arg : Args) {
llvm::Value *CallArg;
auto I = LocalAddrs.find(Arg);
@@ -639,6 +660,11 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
I->second.second,
I->second.first ? I->second.first->getType() : Arg->getType(),
AlignmentSource::Decl);
+ if (LV.getType()->isAnyComplexType())
+ LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ LV.getAddress(WrapperCGF),
+ PI->getType()->getPointerTo(
+ LV.getAddress(WrapperCGF).getAddressSpace())));
CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc());
} else {
auto EI = VLASizes.find(Arg);
@@ -652,6 +678,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
}
}
CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType()));
+ ++PI;
}
CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs);
WrapperCGF.FinishFunction();
@@ -675,7 +702,8 @@ void CodeGenFunction::EmitOMPAggregateAssign(
llvm::Value *SrcBegin = SrcAddr.getPointer();
llvm::Value *DestBegin = DestAddr.getPointer();
// Cast from pointer to array type to pointer to single element.
- llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements);
+ llvm::Value *DestEnd =
+ Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
// The basic structure here is a while-do loop.
llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body");
llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done");
@@ -708,9 +736,11 @@ void CodeGenFunction::EmitOMPAggregateAssign(
// Shift the address forward by one element.
llvm::Value *DestElementNext = Builder.CreateConstGEP1_32(
- DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
+ DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
+ "omp.arraycpy.dest.element");
llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32(
- SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
+ SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
+ "omp.arraycpy.src.element");
// Check whether we've reached the end.
llvm::Value *Done =
Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
@@ -803,8 +833,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
FD && FD->getType()->isReferenceType() &&
(!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
- (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this,
- OrigVD);
+ EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
++IRef;
++InitsRef;
continue;
@@ -985,12 +1014,14 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
// need to copy data.
CopyBegin = createBasicBlock("copyin.not.master");
CopyEnd = createBasicBlock("copyin.not.master.end");
+ // TODO: Avoid ptrtoint conversion.
+ auto *MasterAddrInt =
+ Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy);
+ auto *PrivateAddrInt =
+ Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy);
Builder.CreateCondBr(
- Builder.CreateICmpNE(
- Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy),
- Builder.CreatePtrToInt(PrivateAddr.getPointer(),
- CGM.IntPtrTy)),
- CopyBegin, CopyEnd);
+ Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin,
+ CopyEnd);
EmitBlock(CopyBegin);
}
const auto *SrcVD =
@@ -1755,6 +1786,31 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
checkForLastprivateConditionalUpdate(*this, S);
}
+namespace {
+/// RAII to handle scopes for loop transformation directives.
+class OMPTransformDirectiveScopeRAII {
+ OMPLoopScope *Scope = nullptr;
+ CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
+ CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
+
+public:
+ OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
+ if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) {
+ Scope = new OMPLoopScope(CGF, *Dir);
+ CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
+ CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
+ }
+ }
+ ~OMPTransformDirectiveScopeRAII() {
+ if (!Scope)
+ return;
+ delete CapInfoRAII;
+ delete CGSI;
+ delete Scope;
+ }
+};
+} // namespace
+
static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
int MaxLevel, int Level = 0) {
assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
@@ -1771,6 +1827,12 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
return;
}
if (SimplifiedS == NextLoop) {
+ if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS))
+ SimplifiedS = Dir->getTransformedStmt();
+ if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS))
+ SimplifiedS = Dir->getTransformedStmt();
+ if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS))
+ SimplifiedS = CanonLoop->getLoopStmt();
if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) {
S = For->getBody();
} else {
@@ -1845,9 +1907,9 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
// Emit loop body.
emitBody(*this, Body,
- OMPLoopDirective::tryToFindNextInnerLoop(
+ OMPLoopBasedDirective::tryToFindNextInnerLoop(
Body, /*TryImperfectlyNestedLoops=*/true),
- D.getCollapsedNumber());
+ D.getLoopsNumber());
// Jump to the dispatcher at the end of the loop body.
if (IsInscanRegion)
@@ -1858,6 +1920,121 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
BreakContinueStack.pop_back();
}
+using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
+
+/// Emit a captured statement and return the function as well as its captured
+/// closure context.
+static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
+ const CapturedStmt *S) {
+ LValue CapStruct = ParentCGF.InitCapturedStruct(*S);
+ CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
+ std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
+ std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
+ llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S);
+
+ return {F, CapStruct.getPointer(ParentCGF)};
+}
+
+/// Emit a call to a previously captured closure.
+static llvm::CallInst *
+emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
+ llvm::ArrayRef<llvm::Value *> Args) {
+ // Append the closure context to the argument.
+ SmallVector<llvm::Value *> EffectiveArgs;
+ EffectiveArgs.reserve(Args.size() + 1);
+ llvm::append_range(EffectiveArgs, Args);
+ EffectiveArgs.push_back(Cap.second);
+
+ return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs);
+}
+
+llvm::CanonicalLoopInfo *
+CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
+ assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
+
+ EmitStmt(S);
+ assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
+
+ // The last added loop is the outermost one.
+ return OMPLoopNestStack.back();
+}
+
+void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
+ const Stmt *SyntacticalLoop = S->getLoopStmt();
+ if (!getLangOpts().OpenMPIRBuilder) {
+ // Ignore if OpenMPIRBuilder is not enabled.
+ EmitStmt(SyntacticalLoop);
+ return;
+ }
+
+ LexicalScope ForScope(*this, S->getSourceRange());
+
+ // Emit init statements. The Distance/LoopVar funcs may reference variable
+ // declarations they contain.
+ const Stmt *BodyStmt;
+ if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) {
+ if (const Stmt *InitStmt = For->getInit())
+ EmitStmt(InitStmt);
+ BodyStmt = For->getBody();
+ } else if (const auto *RangeFor =
+ dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) {
+ if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
+ EmitStmt(RangeStmt);
+ if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
+ EmitStmt(BeginStmt);
+ if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
+ EmitStmt(EndStmt);
+ if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
+ EmitStmt(LoopVarStmt);
+ BodyStmt = RangeFor->getBody();
+ } else
+ llvm_unreachable("Expected for-stmt or range-based for-stmt");
+
+ // Emit closure for later use. By-value captures will be captured here.
+ const CapturedStmt *DistanceFunc = S->getDistanceFunc();
+ EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc);
+ const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
+ EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc);
+
+ // Call the distance function to get the number of iterations of the loop to
+ // come.
+ QualType LogicalTy = DistanceFunc->getCapturedDecl()
+ ->getParam(0)
+ ->getType()
+ .getNonReferenceType();
+ Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr");
+ emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()});
+ llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count");
+
+ // Emit the loop structure.
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ llvm::Value *IndVar) {
+ Builder.restoreIP(CodeGenIP);
+
+ // Emit the loop body: Convert the logical iteration number to the loop
+ // variable and emit the body.
+ const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
+ LValue LCVal = EmitLValue(LoopVarRef);
+ Address LoopVarAddress = LCVal.getAddress(*this);
+ emitCapturedStmtCall(*this, LoopVarClosure,
+ {LoopVarAddress.getPointer(), IndVar});
+
+ RunCleanupsScope BodyScope(*this);
+ EmitStmt(BodyStmt);
+ };
+ llvm::CanonicalLoopInfo *CL =
+ OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal);
+
+ // Finish up the loop.
+ Builder.restoreIP(CL->getAfterIP());
+ ForScope.ForceCleanup();
+
+ // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
+ OMPLoopNestStack.push_back(CL);
+}
+
void CodeGenFunction::EmitOMPInnerLoop(
const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
const Expr *IncExpr,
@@ -1875,6 +2052,7 @@ void CodeGenFunction::EmitOMPInnerLoop(
const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
const Stmt *SS = ICS->getCapturedStmt();
const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS);
+ OMPLoopNestStack.clear();
if (AS)
LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(),
AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()),
@@ -2062,8 +2240,7 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
if (!C->getNumForLoops())
continue;
- for (unsigned I = S.getCollapsedNumber(),
- E = C->getLoopNumIterations().size();
+ for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
I < E; ++I) {
const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
const auto *VD = cast<VarDecl>(DRE->getDecl());
@@ -2152,8 +2329,7 @@ void CodeGenFunction::EmitOMPLinearClause(
}
static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
- const OMPExecutableDirective &D,
- bool IsMonotonic) {
+ const OMPExecutableDirective &D) {
if (!CGF.HaveInsertPoint())
return;
if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
@@ -2164,8 +2340,7 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
// In presence of finite 'safelen', it may be unsafe to mark all
// the memory instructions parallel, because loop-carried
// dependences of 'safelen' iterations are possible.
- if (!IsMonotonic)
- CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
+ CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
} else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(),
/*ignoreResult=*/true);
@@ -2178,12 +2353,11 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
}
}
-void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
- bool IsMonotonic) {
+void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
// Walk clauses and process safelen/lastprivate.
- LoopStack.setParallel(!IsMonotonic);
+ LoopStack.setParallel(/*Enable=*/true);
LoopStack.setVectorizeEnable();
- emitSimdlenSafelenClause(*this, D, IsMonotonic);
+ emitSimdlenSafelenClause(*this, D);
if (const auto *C = D.getSingleClause<OMPOrderClause>())
if (C->getKind() == OMPC_ORDER_concurrent)
LoopStack.setParallel(/*Enable=*/true);
@@ -2406,6 +2580,34 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
checkForLastprivateConditionalUpdate(*this, S);
}
+void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
+ // Emit the de-sugared statement.
+ OMPTransformDirectiveScopeRAII TileScope(*this, &S);
+ EmitStmt(S.getTransformedStmt());
+}
+
+void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
+ // This function is only called if the unrolled loop is not consumed by any
+ // other loop-associated construct. Such a loop-associated construct will have
+ // used the transformed AST.
+
+ // Set the unroll metadata for the next emitted loop.
+ LoopStack.setUnrollState(LoopAttributes::Enable);
+
+ if (S.hasClausesOfKind<OMPFullClause>()) {
+ LoopStack.setUnrollState(LoopAttributes::Full);
+ } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
+ if (Expr *FactorExpr = PartialClause->getFactor()) {
+ uint64_t Factor =
+ FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue();
+ assert(Factor >= 1 && "Only positive factors are valid");
+ LoopStack.setUnrollCount(Factor);
+ }
+ }
+
+ EmitStmt(S.getAssociatedStmt());
+}
+
void CodeGenFunction::EmitOMPOuterLoop(
bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
CodeGenFunction::OMPPrivateScope &LoopScope,
@@ -2424,6 +2626,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond");
EmitBlock(CondBlock);
const SourceRange R = S.getSourceRange();
+ OMPLoopNestStack.clear();
LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()),
SourceLocToDebugLoc(R.getEnd()));
@@ -2477,7 +2680,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
if (C->getKind() == OMPC_ORDER_concurrent)
CGF.LoopStack.setParallel(/*Enable=*/true);
} else {
- CGF.EmitOMPSimdInit(S, IsMonotonic);
+ CGF.EmitOMPSimdInit(S);
}
},
[&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
@@ -2507,6 +2710,7 @@ void CodeGenFunction::EmitOMPOuterLoop(
}
EmitBranch(CondBlock);
+ OMPLoopNestStack.clear();
LoopStack.pop();
// Emit the fall-through block.
EmitBlock(LoopExit.getBlock());
@@ -2986,8 +3190,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
bool IsMonotonic =
Ordered ||
- ((ScheduleKind.Schedule == OMPC_SCHEDULE_static ||
- ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) &&
+ (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
!(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
@@ -3000,9 +3203,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
emitCommonSimdLoop(
*this, S,
- [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
+ [&S](CodeGenFunction &CGF, PrePostActionTy &) {
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
- CGF.EmitOMPSimdInit(S, IsMonotonic);
+ CGF.EmitOMPSimdInit(S);
} else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
if (C->getKind() == OMPC_ORDER_concurrent)
CGF.LoopStack.setParallel(/*Enable=*/true);
@@ -3131,53 +3334,30 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
return {LBVal, UBVal};
}
-/// Emits the code for the directive with inscan reductions.
+/// Emits internal temp array declarations for the directive with inscan
+/// reductions.
/// The code is the following:
/// \code
/// size num_iters = <num_iters>;
/// <type> buffer[num_iters];
-/// #pragma omp ...
-/// for (i: 0..<num_iters>) {
-/// <input phase>;
-/// buffer[i] = red;
-/// }
-/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
-/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
-/// buffer[i] op= buffer[i-pow(2,k)];
-/// #pragma omp ...
-/// for (0..<num_iters>) {
-/// red = InclusiveScan ? buffer[i] : buffer[i-1];
-/// <scan phase>;
-/// }
/// \endcode
-static void emitScanBasedDirective(
+static void emitScanBasedDirectiveDecls(
CodeGenFunction &CGF, const OMPLoopDirective &S,
- llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
- llvm::function_ref<void(CodeGenFunction &)> FirstGen,
- llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
+ llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
SmallVector<const Expr *, 4> Shareds;
SmallVector<const Expr *, 4> Privates;
SmallVector<const Expr *, 4> ReductionOps;
- SmallVector<const Expr *, 4> LHSs;
- SmallVector<const Expr *, 4> RHSs;
- SmallVector<const Expr *, 4> CopyOps;
SmallVector<const Expr *, 4> CopyArrayTemps;
- SmallVector<const Expr *, 4> CopyArrayElems;
for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
assert(C->getModifier() == OMPC_REDUCTION_inscan &&
"Only inscan reductions are expected.");
Shareds.append(C->varlist_begin(), C->varlist_end());
Privates.append(C->privates().begin(), C->privates().end());
ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
- LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
- RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
- CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
CopyArrayTemps.append(C->copy_array_temps().begin(),
C->copy_array_temps().end());
- CopyArrayElems.append(C->copy_array_elems().begin(),
- C->copy_array_elems().end());
}
{
// Emit buffers for each reduction variables.
@@ -3206,6 +3386,49 @@ static void emitScanBasedDirective(
++Count;
}
}
+}
+
+/// Emits the code for the directive with inscan reductions.
+/// The code is the following:
+/// \code
+/// #pragma omp ...
+/// for (i: 0..<num_iters>) {
+/// <input phase>;
+/// buffer[i] = red;
+/// }
+/// #pragma omp master // in parallel region
+/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
+/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
+/// buffer[i] op= buffer[i-pow(2,k)];
+/// #pragma omp barrier // in parallel region
+/// #pragma omp ...
+/// for (0..<num_iters>) {
+/// red = InclusiveScan ? buffer[i] : buffer[i-1];
+/// <scan phase>;
+/// }
+/// \endcode
+static void emitScanBasedDirective(
+ CodeGenFunction &CGF, const OMPLoopDirective &S,
+ llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
+ llvm::function_ref<void(CodeGenFunction &)> FirstGen,
+ llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
+ llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
+ NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false);
+ SmallVector<const Expr *, 4> Privates;
+ SmallVector<const Expr *, 4> ReductionOps;
+ SmallVector<const Expr *, 4> LHSs;
+ SmallVector<const Expr *, 4> RHSs;
+ SmallVector<const Expr *, 4> CopyArrayElems;
+ for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
+ assert(C->getModifier() == OMPC_REDUCTION_inscan &&
+ "Only inscan reductions are expected.");
+ Privates.append(C->privates().begin(), C->privates().end());
+ ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
+ LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
+ RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
+ CopyArrayElems.append(C->copy_array_elems().begin(),
+ C->copy_array_elems().end());
+ }
CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
{
// Emit loop with input phase:
@@ -3218,90 +3441,108 @@ static void emitScanBasedDirective(
CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
FirstGen(CGF);
}
- // Emit prefix reduction:
- // for (int k = 0; k <= ceil(log2(n)); ++k)
- llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
- llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
- llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
- llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
- llvm::Value *Arg =
- CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
- llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
- F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
- LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
- LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
- llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
- OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
- auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
- CGF.EmitBlock(LoopBB);
- auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
- // size pow2k = 1;
- auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
- Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
- Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
- // for (size i = n - 1; i >= 2 ^ k; --i)
- // tmp[i] op= tmp[i-pow2k];
- llvm::BasicBlock *InnerLoopBB =
- CGF.createBasicBlock("omp.inner.log.scan.body");
- llvm::BasicBlock *InnerExitBB =
- CGF.createBasicBlock("omp.inner.log.scan.exit");
- llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
- CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
- CGF.EmitBlock(InnerLoopBB);
- auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
- IVal->addIncoming(NMin1, LoopBB);
- {
- CodeGenFunction::OMPPrivateScope PrivScope(CGF);
- auto *ILHS = LHSs.begin();
- auto *IRHS = RHSs.begin();
- for (const Expr *CopyArrayElem : CopyArrayElems) {
- const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
- const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
- Address LHSAddr = Address::invalid();
- {
- CodeGenFunction::OpaqueValueMapping IdxMapping(
- CGF,
- cast<OpaqueValueExpr>(
- cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
- RValue::get(IVal));
- LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
- }
- PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
- Address RHSAddr = Address::invalid();
- {
- llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
- CodeGenFunction::OpaqueValueMapping IdxMapping(
- CGF,
- cast<OpaqueValueExpr>(
- cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
- RValue::get(OffsetIVal));
- RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ // #pragma omp barrier // in parallel region
+ auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
+ &ReductionOps,
+ &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ // Emit prefix reduction:
+ // #pragma omp master // in parallel region
+ // for (int k = 0; k <= ceil(log2(n)); ++k)
+ llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
+ llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body");
+ llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit");
+ llvm::Function *F =
+ CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
+ llvm::Value *Arg =
+ CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy);
+ llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg);
+ F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
+ LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal);
+ LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy);
+ llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
+ OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc());
+ CGF.EmitBlock(LoopBB);
+ auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2);
+ // size pow2k = 1;
+ auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB);
+ Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB);
+ // for (size i = n - 1; i >= 2 ^ k; --i)
+ // tmp[i] op= tmp[i-pow2k];
+ llvm::BasicBlock *InnerLoopBB =
+ CGF.createBasicBlock("omp.inner.log.scan.body");
+ llvm::BasicBlock *InnerExitBB =
+ CGF.createBasicBlock("omp.inner.log.scan.exit");
+ llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerLoopBB);
+ auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2);
+ IVal->addIncoming(NMin1, LoopBB);
+ {
+ CodeGenFunction::OMPPrivateScope PrivScope(CGF);
+ auto *ILHS = LHSs.begin();
+ auto *IRHS = RHSs.begin();
+ for (const Expr *CopyArrayElem : CopyArrayElems) {
+ const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
+ const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
+ Address LHSAddr = Address::invalid();
+ {
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(IVal));
+ LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; });
+ Address RHSAddr = Address::invalid();
+ {
+ llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K);
+ CodeGenFunction::OpaqueValueMapping IdxMapping(
+ CGF,
+ cast<OpaqueValueExpr>(
+ cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()),
+ RValue::get(OffsetIVal));
+ RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF);
+ }
+ PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
+ ++ILHS;
+ ++IRHS;
}
- PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; });
- ++ILHS;
- ++IRHS;
+ PrivScope.Privatize();
+ CGF.CGM.getOpenMPRuntime().emitReduction(
+ CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
+ {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
}
- PrivScope.Privatize();
- CGF.CGM.getOpenMPRuntime().emitReduction(
- CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
- {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
- }
- llvm::Value *NextIVal =
- CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
- IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
- CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
- CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
- CGF.EmitBlock(InnerExitBB);
- llvm::Value *Next =
- CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
- Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
- // pow2k <<= 1;
- llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
- Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
- llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
- CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
- auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
- CGF.EmitBlock(ExitBB);
+ llvm::Value *NextIVal =
+ CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1));
+ IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock());
+ CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K);
+ CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ CGF.EmitBlock(InnerExitBB);
+ llvm::Value *Next =
+ CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1));
+ Counter->addIncoming(Next, CGF.Builder.GetInsertBlock());
+ // pow2k <<= 1;
+ llvm::Value *NextPow2K =
+ CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+ Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock());
+ llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal);
+ CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+ auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc());
+ CGF.EmitBlock(ExitBB);
+ };
+ if (isOpenMPParallelDirective(S.getDirectiveKind())) {
+ CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc());
+ CGF.CGM.getOpenMPRuntime().emitBarrierCall(
+ CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
+ /*ForceSimpleCall=*/true);
+ } else {
+ RegionCodeGenTy RCG(CodeGen);
+ RCG(CGF);
+ }
CGF.OMPFirstScanLoop = false;
SecondGen(CGF);
@@ -3338,6 +3579,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
emitForLoopBounds,
emitDispatchForLoopBounds);
};
+ if (!isOpenMPParallelDirective(S.getDirectiveKind()))
+ emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
} else {
CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
@@ -3349,10 +3592,38 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF,
return HasLastprivates;
}
+static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
+ if (S.hasCancel())
+ return false;
+ for (OMPClause *C : S.clauses())
+ if (!isa<OMPNowaitClause>(C))
+ return false;
+
+ return true;
+}
+
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
- auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
- PrePostActionTy &) {
+ bool UseOMPIRBuilder =
+ CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
+ auto &&CodeGen = [this, &S, &HasLastprivates,
+ UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
+ // Use the OpenMPIRBuilder if enabled.
+ if (UseOMPIRBuilder) {
+ // Emit the associated statement and get its loop representation.
+ const Stmt *Inner = S.getRawStmt();
+ llvm::CanonicalLoopInfo *CLI =
+ EmitOMPCollapsedCanonicalLoopNest(Inner, 1);
+
+ bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
+ llvm::OpenMPIRBuilder &OMPBuilder =
+ CGM.getOpenMPRuntime().getOMPBuilder();
+ llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+ AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+ OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier);
+ return;
+ }
+
HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
@@ -3363,9 +3634,11 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
S.hasCancel());
}
- // Emit an implicit barrier at the end.
- if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
- CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+ if (!UseOMPIRBuilder) {
+ // Emit an implicit barrier at the end.
+ if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
+ CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
+ }
// Check for outer lastprivate conditional update.
checkForLastprivateConditionalUpdate(*this, S);
}
@@ -3428,11 +3701,11 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
// Generate condition for loop.
BinaryOperator *Cond = BinaryOperator::Create(
- C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary,
+ C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary,
S.getBeginLoc(), FPOptionsOverride());
// Increment for loop counter.
UnaryOperator *Inc = UnaryOperator::Create(
- C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary,
+ C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
S.getBeginLoc(), true, FPOptionsOverride());
auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
// Iterate through all sections and emit a switch construct:
@@ -3546,6 +3819,64 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
}
void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+ using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ const CapturedStmt *ICS = S.getInnermostCapturedStmt();
+ const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
+ const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt);
+ llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
+ if (CS) {
+ for (const Stmt *SubStmt : CS->children()) {
+ auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP,
+ FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP,
+ FiniBB);
+ };
+ SectionCBVector.push_back(SectionCB);
+ }
+ } else {
+ auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP,
+ FiniBB);
+ };
+ SectionCBVector.push_back(SectionCB);
+ }
+
+ // Privatization callback that performs appropriate action for
+ // shared/private/firstprivate/lastprivate/copyin/... variables.
+ //
+ // TODO: This defaults to shared right now.
+ auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
+ llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
+ // The next line is appropriate only for variables (Val) with the
+ // data-sharing attribute "shared".
+ ReplVal = &Val;
+
+ return CodeGenIP;
+ };
+
+ CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
+ llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
+ AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
+ Builder.restoreIP(OMPBuilder.createSections(
+ Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(),
+ S.getSingleClause<OMPNowaitClause>()));
+ return;
+ }
{
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
@@ -3562,6 +3893,29 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
}
void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitStopPoint(&S);
+ Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB));
+
+ return;
+ }
LexicalScope Scope(*this, S.getSourceRange());
EmitStopPoint(&S);
EmitStmt(S.getAssociatedStmt());
@@ -3650,6 +4004,55 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
emitMaster(*this, S);
}
+static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
+ Action.Enter(CGF);
+ CGF.EmitStmt(S.getRawStmt());
+ };
+ Expr *Filter = nullptr;
+ if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
+ Filter = FilterClause->getThreadID();
+ CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(),
+ Filter);
+}
+
+void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
+ if (CGM.getLangOpts().OpenMPIRBuilder) {
+ llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
+ using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+ const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
+ const Expr *Filter = nullptr;
+ if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
+ Filter = FilterClause->getThreadID();
+ llvm::Value *FilterVal = Filter
+ ? EmitScalarExpr(Filter, CGM.Int32Ty)
+ : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
+
+ auto FiniCB = [this](InsertPointTy IP) {
+ OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP);
+ };
+
+ auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ llvm::BasicBlock &FiniBB) {
+ OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB);
+ OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt,
+ CodeGenIP, FiniBB);
+ };
+
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitStopPoint(&S);
+ Builder.restoreIP(
+ OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal));
+
+ return;
+ }
+ LexicalScope Scope(*this, S.getSourceRange());
+ EmitStopPoint(&S);
+ emitMasked(*this, S);
+}
+
void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
if (CGM.getLangOpts().OpenMPIRBuilder) {
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
@@ -3712,6 +4115,19 @@ void CodeGenFunction::EmitOMPParallelForDirective(
(void)emitWorksharingDirective(CGF, S, S.hasCancel());
};
{
+ if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ })) {
+ const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ CGCapturedStmtInfo CGSI(CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+ OMPLoopScope LoopScope(CGF, S);
+ return CGF.EmitScalarExpr(S.getNumIterations());
+ };
+ emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
+ }
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
@@ -3730,6 +4146,19 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
(void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
};
{
+ if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
+ [](const OMPReductionClause *C) {
+ return C->getModifier() == OMPC_REDUCTION_inscan;
+ })) {
+ const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
+ CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
+ CGCapturedStmtInfo CGSI(CR_OpenMP);
+ CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
+ OMPLoopScope LoopScope(CGF, S);
+ return CGF.EmitScalarExpr(S.getNumIterations());
+ };
+ emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
+ }
auto LPCRegion =
CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
@@ -3892,7 +4321,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
}
}
// Get list of lastprivate variables (for taskloops).
- llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
+ llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
auto IRef = C->varlist_begin();
auto ID = C->destination_exprs().begin();
@@ -3903,8 +4332,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
Data.LastprivateCopies.push_back(IInit);
}
LastprivateDstsOrigs.insert(
- {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
- cast<DeclRefExpr>(*IRef)});
+ std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()),
+ cast<DeclRefExpr>(*IRef)));
++IRef;
++ID;
}
@@ -3938,15 +4367,14 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
CapturedRegion](CodeGenFunction &CGF,
PrePostActionTy &Action) {
- llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>>
+ llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
+ std::pair<Address, Address>>
UntiedLocalVars;
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
!Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
- llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
- CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
enum { PrivatesParam = 2, CopyFnParam = 3 };
llvm::Value *CopyFn = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
@@ -3955,13 +4383,16 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
// Map privates.
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
llvm::SmallVector<llvm::Value *, 16> CallArgs;
+ llvm::SmallVector<llvm::Type *, 4> ParamTypes;
CallArgs.push_back(PrivatesPtr);
+ ParamTypes.push_back(PrivatesPtr->getType());
for (const Expr *E : Data.PrivateVars) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr = CGF.CreateMemTemp(
CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr");
PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
+ ParamTypes.push_back(PrivatePtr.getType());
}
for (const Expr *E : Data.FirstprivateVars) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
@@ -3971,6 +4402,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
PrivatePtrs.emplace_back(VD, PrivatePtr);
FirstprivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
+ ParamTypes.push_back(PrivatePtr.getType());
}
for (const Expr *E : Data.LastprivateVars) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
@@ -3979,6 +4411,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
".lastpriv.ptr.addr");
PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
+ ParamTypes.push_back(PrivatePtr.getType());
}
for (const VarDecl *VD : Data.PrivateLocals) {
QualType Ty = VD->getType().getNonReferenceType();
@@ -3988,9 +4421,19 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
Ty = CGF.getContext().getPointerType(Ty);
Address PrivatePtr = CGF.CreateMemTemp(
CGF.getContext().getPointerType(Ty), ".local.ptr.addr");
- UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid());
+ auto Result = UntiedLocalVars.insert(
+ std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid())));
+ // If key exists update in place.
+ if (Result.second == false)
+ *Result.first = std::make_pair(
+ VD, std::make_pair(PrivatePtr, Address::invalid()));
CallArgs.push_back(PrivatePtr.getPointer());
+ ParamTypes.push_back(PrivatePtr.getType());
}
+ auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
+ ParamTypes, /*isVarArg=*/false);
+ CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CopyFn, CopyFnTy->getPointerTo());
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
for (const auto &Pair : LastprivateDstsOrigs) {
@@ -4015,14 +4458,14 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
if (isAllocatableDecl(Pair.first)) {
llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
Address Replacement(Ptr, CGF.getPointerAlign());
- Pair.getSecond().first = Replacement;
+ Pair.second.first = Replacement;
Ptr = CGF.Builder.CreateLoad(Replacement);
Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first));
- Pair.getSecond().second = Replacement;
+ Pair.second.second = Replacement;
} else {
llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first);
Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first));
- Pair.getSecond().first = Replacement;
+ Pair.second.first = Replacement;
}
}
}
@@ -4156,7 +4599,7 @@ createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
PrivateVD->setInitStyle(VarDecl::CInit);
PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue,
InitRef, /*BasePath=*/nullptr,
- VK_RValue, FPOptionsOverride()));
+ VK_PRValue, FPOptionsOverride()));
Data.FirstprivateVars.emplace_back(OrigRef);
Data.FirstprivateCopies.emplace_back(PrivateRef);
Data.FirstprivateInits.emplace_back(InitRef);
@@ -4238,8 +4681,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
// Set proper addresses for generated private copies.
OMPPrivateScope Scope(CGF);
if (!Data.FirstprivateVars.empty()) {
- llvm::FunctionType *CopyFnTy = llvm::FunctionType::get(
- CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true);
enum { PrivatesParam = 2, CopyFnParam = 3 };
llvm::Value *CopyFn = CGF.Builder.CreateLoad(
CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam)));
@@ -4248,7 +4689,9 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
// Map privates.
llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
llvm::SmallVector<llvm::Value *, 16> CallArgs;
+ llvm::SmallVector<llvm::Type *, 4> ParamTypes;
CallArgs.push_back(PrivatesPtr);
+ ParamTypes.push_back(PrivatesPtr->getType());
for (const Expr *E : Data.FirstprivateVars) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
Address PrivatePtr =
@@ -4256,7 +4699,12 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
".firstpriv.ptr.addr");
PrivatePtrs.emplace_back(VD, PrivatePtr);
CallArgs.push_back(PrivatePtr.getPointer());
+ ParamTypes.push_back(PrivatePtr.getType());
}
+ auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(),
+ ParamTypes, /*isVarArg=*/false);
+ CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+ CopyFn, CopyFnTy->getPointerTo());
CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs);
for (const auto &Pair : PrivatePtrs) {
@@ -4779,7 +5227,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
*this, S,
[&S](CodeGenFunction &CGF, PrePostActionTy &) {
if (isOpenMPSimdDirective(S.getDirectiveKind()))
- CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true);
+ CGF.EmitOMPSimdInit(S);
},
[&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -4859,6 +5307,8 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
CGF.CapturedStmtInfo = &CapStmtInfo;
llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc);
Fn->setDoesNotRecurse();
+ if (CGM.getCodeGenOpts().OptimizationLevel != 0)
+ Fn->addFnAttr(llvm::Attribute::AlwaysInline);
return Fn;
}
@@ -5281,32 +5731,35 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
// Emit post-update store to 'v' of old/new 'x' value.
CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc);
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V);
- // OpenMP, 2.17.7, atomic Construct
- // If the write, update, or capture clause is specified and the release,
- // acq_rel, or seq_cst clause is specified then the strong flush on entry to
- // the atomic operation is also a release flush.
- // If the read or capture clause is specified and the acquire, acq_rel, or
- // seq_cst clause is specified then the strong flush on exit from the atomic
- // operation is also an acquire flush.
- switch (AO) {
- case llvm::AtomicOrdering::Release:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
- llvm::AtomicOrdering::Release);
- break;
- case llvm::AtomicOrdering::Acquire:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
- llvm::AtomicOrdering::Acquire);
- break;
- case llvm::AtomicOrdering::AcquireRelease:
- case llvm::AtomicOrdering::SequentiallyConsistent:
- CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
- llvm::AtomicOrdering::AcquireRelease);
- break;
- case llvm::AtomicOrdering::Monotonic:
- break;
- case llvm::AtomicOrdering::NotAtomic:
- case llvm::AtomicOrdering::Unordered:
- llvm_unreachable("Unexpected ordering.");
+ // OpenMP 5.1 removes the required flush for capture clause.
+ if (CGF.CGM.getLangOpts().OpenMP < 51) {
+ // OpenMP, 2.17.7, atomic Construct
+ // If the write, update, or capture clause is specified and the release,
+ // acq_rel, or seq_cst clause is specified then the strong flush on entry to
+ // the atomic operation is also a release flush.
+ // If the read or capture clause is specified and the acquire, acq_rel, or
+ // seq_cst clause is specified then the strong flush on exit from the atomic
+ // operation is also an acquire flush.
+ switch (AO) {
+ case llvm::AtomicOrdering::Release:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Release);
+ break;
+ case llvm::AtomicOrdering::Acquire:
+ CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc,
+ llvm::AtomicOrdering::Acquire);
+ break;
+ case llvm::AtomicOrdering::AcquireRelease:
+ case llvm::AtomicOrdering::SequentiallyConsistent:
+ CGF.CGM.getOpenMPRuntime().emitFlush(
+ CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease);
+ break;
+ case llvm::AtomicOrdering::Monotonic:
+ break;
+ case llvm::AtomicOrdering::NotAtomic:
+ case llvm::AtomicOrdering::Unordered:
+ llvm_unreachable("Unexpected ordering.");
+ }
}
}
@@ -5341,6 +5794,9 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_in_reduction:
case OMPC_safelen:
case OMPC_simdlen:
+ case OMPC_sizes:
+ case OMPC_full:
+ case OMPC_partial:
case OMPC_allocator:
case OMPC_allocate:
case OMPC_collapse:
@@ -5399,7 +5855,14 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
case OMPC_exclusive:
case OMPC_uses_allocators:
case OMPC_affinity:
- default:
+ case OMPC_init:
+ case OMPC_inbranch:
+ case OMPC_notinbranch:
+ case OMPC_link:
+ case OMPC_use:
+ case OMPC_novariants:
+ case OMPC_nocontext:
+ case OMPC_filter:
llvm_unreachable("Clause is not allowed in 'omp atomic'.");
}
}
@@ -5431,7 +5894,7 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
C->getClauseKind() != OMPC_acq_rel &&
C->getClauseKind() != OMPC_acquire &&
C->getClauseKind() != OMPC_release &&
- C->getClauseKind() != OMPC_relaxed) {
+ C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) {
Kind = C->getClauseKind();
break;
}
@@ -5554,6 +6017,7 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
+ CGF.EnsureInsertPoint();
}
void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
@@ -5970,7 +6434,9 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
// TODO: This check is necessary as we only generate `omp parallel` through
// the OpenMPIRBuilder for now.
- if (S.getCancelRegion() == OMPD_parallel) {
+ if (S.getCancelRegion() == OMPD_parallel ||
+ S.getCancelRegion() == OMPD_sections ||
+ S.getCancelRegion() == OMPD_section) {
llvm::Value *IfCondition = nullptr;
if (IfCond)
IfCondition = EmitScalarExpr(IfCond,
@@ -6695,7 +7161,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
if (!C->getNumForLoops())
continue;
- for (unsigned I = LD->getCollapsedNumber(),
+ for (unsigned I = LD->getLoopsNumber(),
E = C->getLoopNumIterations().size();
I < E; ++I) {
if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
@@ -6714,7 +7180,8 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
if (D.getDirectiveKind() == OMPD_atomic ||
D.getDirectiveKind() == OMPD_critical ||
D.getDirectiveKind() == OMPD_section ||
- D.getDirectiveKind() == OMPD_master) {
+ D.getDirectiveKind() == OMPD_master ||
+ D.getDirectiveKind() == OMPD_masked) {
EmitStmt(D.getAssociatedStmt());
} else {
auto LPCRegion =