diff options
Diffstat (limited to 'clang/lib/CodeGen/CGStmtOpenMP.cpp')
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 925 |
1 files changed, 696 insertions, 229 deletions
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 5e8d98cfe5ef..f6233b791182 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -133,50 +133,55 @@ public: /// Private scope for OpenMP loop-based directives, that supports capturing /// of used expression from loop statement. class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { - void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopDirective &S) { + void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { + const DeclStmt *PreInits; CodeGenFunction::OMPMapVars PreCondVars; - llvm::DenseSet<const VarDecl *> EmittedAsPrivate; - for (const auto *E : S.counters()) { - const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); - EmittedAsPrivate.insert(VD->getCanonicalDecl()); - (void)PreCondVars.setVarAddr( - CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); - } - // Mark private vars as undefs. - for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { - for (const Expr *IRef : C->varlists()) { - const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); - if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - (void)PreCondVars.setVarAddr( - CGF, OrigVD, - Address(llvm::UndefValue::get( - CGF.ConvertTypeForMem(CGF.getContext().getPointerType( - OrigVD->getType().getNonReferenceType()))), - CGF.getContext().getDeclAlign(OrigVD))); - } + if (auto *LD = dyn_cast<OMPLoopDirective>(&S)) { + llvm::DenseSet<const VarDecl *> EmittedAsPrivate; + for (const auto *E : LD->counters()) { + const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); + EmittedAsPrivate.insert(VD->getCanonicalDecl()); + (void)PreCondVars.setVarAddr( + CGF, VD, CGF.CreateMemTemp(VD->getType().getNonReferenceType())); } - } - (void)PreCondVars.apply(CGF); - // Emit init, __range and __end variables for C++ range loops. - const Stmt *Body = - S.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); - for (unsigned Cnt = 0; Cnt < S.getCollapsedNumber(); ++Cnt) { - Body = OMPLoopDirective::tryToFindNextInnerLoop( - Body, /*TryImperfectlyNestedLoops=*/true); - if (auto *For = dyn_cast<ForStmt>(Body)) { - Body = For->getBody(); - } else { - assert(isa<CXXForRangeStmt>(Body) && - "Expected canonical for loop or range-based for loop."); - auto *CXXFor = cast<CXXForRangeStmt>(Body); - if (const Stmt *Init = CXXFor->getInit()) - CGF.EmitStmt(Init); - CGF.EmitStmt(CXXFor->getRangeStmt()); - CGF.EmitStmt(CXXFor->getEndStmt()); - Body = CXXFor->getBody(); + // Mark private vars as undefs. + for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { + for (const Expr *IRef : C->varlists()) { + const auto *OrigVD = + cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); + if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { + (void)PreCondVars.setVarAddr( + CGF, OrigVD, + Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( + CGF.getContext().getPointerType( + OrigVD->getType().getNonReferenceType()))), + CGF.getContext().getDeclAlign(OrigVD))); + } + } } + (void)PreCondVars.apply(CGF); + // Emit init, __range and __end variables for C++ range loops. + (void)OMPLoopBasedDirective::doForAllLoops( + LD->getInnermostCapturedStmt()->getCapturedStmt(), + /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), + [&CGF](unsigned Cnt, const Stmt *CurStmt) { + if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(CurStmt)) { + if (const Stmt *Init = CXXFor->getInit()) + CGF.EmitStmt(Init); + CGF.EmitStmt(CXXFor->getRangeStmt()); + CGF.EmitStmt(CXXFor->getEndStmt()); + } + return false; + }); + PreInits = cast_or_null<DeclStmt>(LD->getPreInits()); + } else if (const auto *Tile = dyn_cast<OMPTileDirective>(&S)) { + PreInits = cast_or_null<DeclStmt>(Tile->getPreInits()); + } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(&S)) { + PreInits = cast_or_null<DeclStmt>(Unroll->getPreInits()); + } else { + llvm_unreachable("Unknown loop-based directive kind."); } - if (const auto *PreInits = cast_or_null<DeclStmt>(S.getPreInits())) { + if (PreInits) { for (const auto *I : PreInits->decls()) CGF.EmitVarDecl(cast<VarDecl>(*I)); } @@ -184,7 +189,7 @@ class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { } public: - OMPLoopScope(CodeGenFunction &CGF, const OMPLoopDirective &S) + OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) : CodeGenFunction::RunCleanupsScope(CGF) { emitPreInitStmt(CGF, S); } @@ -238,11 +243,22 @@ public: if (const Expr *E = TG->getReductionRef()) CGF.EmitVarDecl(*cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl())); } + // Temp copy arrays for inscan reductions should not be emitted as they are + // not used in simd only mode. + llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + if (C->getModifier() != OMPC_REDUCTION_inscan) + continue; + for (const Expr *E : C->copy_array_temps()) + CopyArrayTemps.insert(cast<DeclRefExpr>(E)->getDecl()); + } const auto *CS = cast_or_null<CapturedStmt>(S.getAssociatedStmt()); while (CS) { for (auto &C : CS->captures()) { if (C.capturesVariable() || C.capturesVariableByCopy()) { auto *VD = C.getCapturedVar(); + if (CopyArrayTemps.contains(VD)) + continue; assert(VD == VD->getCanonicalDecl() && "Canonical decl must be captured."); DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), @@ -501,6 +517,10 @@ static llvm::Function *emitOutlinedFunctionPrologue( F->setDoesNotThrow(); F->setDoesNotRecurse(); + // Always inline the outlined function if optimizations are enabled. + if (CGM.getCodeGenOpts().OptimizationLevel != 0) + F->addFnAttr(llvm::Attribute::AlwaysInline); + // Generate the function. CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), @@ -631,6 +651,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, emitOutlinedFunctionPrologue(WrapperCGF, Args, LocalAddrs, VLASizes, WrapperCGF.CXXThisValue, WrapperFO); llvm::SmallVector<llvm::Value *, 4> CallArgs; + auto *PI = F->arg_begin(); for (const auto *Arg : Args) { llvm::Value *CallArg; auto I = LocalAddrs.find(Arg); @@ -639,6 +660,11 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, I->second.second, I->second.first ? I->second.first->getType() : Arg->getType(), AlignmentSource::Decl); + if (LV.getType()->isAnyComplexType()) + LV.setAddress(WrapperCGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + LV.getAddress(WrapperCGF), + PI->getType()->getPointerTo( + LV.getAddress(WrapperCGF).getAddressSpace()))); CallArg = WrapperCGF.EmitLoadOfScalar(LV, S.getBeginLoc()); } else { auto EI = VLASizes.find(Arg); @@ -652,6 +678,7 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, } } CallArgs.emplace_back(WrapperCGF.EmitFromMemory(CallArg, Arg->getType())); + ++PI; } CGM.getOpenMPRuntime().emitOutlinedFunctionCall(WrapperCGF, Loc, F, CallArgs); WrapperCGF.FinishFunction(); @@ -675,7 +702,8 @@ void CodeGenFunction::EmitOMPAggregateAssign( llvm::Value *SrcBegin = SrcAddr.getPointer(); llvm::Value *DestBegin = DestAddr.getPointer(); // Cast from pointer to array type to pointer to single element. - llvm::Value *DestEnd = Builder.CreateGEP(DestBegin, NumElements); + llvm::Value *DestEnd = + Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); // The basic structure here is a while-do loop. llvm::BasicBlock *BodyBB = createBasicBlock("omp.arraycpy.body"); llvm::BasicBlock *DoneBB = createBasicBlock("omp.arraycpy.done"); @@ -708,9 +736,11 @@ void CodeGenFunction::EmitOMPAggregateAssign( // Shift the address forward by one element. llvm::Value *DestElementNext = Builder.CreateConstGEP1_32( - DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); + DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, + "omp.arraycpy.dest.element"); llvm::Value *SrcElementNext = Builder.CreateConstGEP1_32( - SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); + SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, + "omp.arraycpy.src.element"); // Check whether we've reached the end. llvm::Value *Done = Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); @@ -803,8 +833,7 @@ bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && FD && FD->getType()->isReferenceType() && (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { - (void)CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(*this, - OrigVD); + EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); ++IRef; ++InitsRef; continue; @@ -985,12 +1014,14 @@ bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { // need to copy data. CopyBegin = createBasicBlock("copyin.not.master"); CopyEnd = createBasicBlock("copyin.not.master.end"); + // TODO: Avoid ptrtoint conversion. + auto *MasterAddrInt = + Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy); + auto *PrivateAddrInt = + Builder.CreatePtrToInt(PrivateAddr.getPointer(), CGM.IntPtrTy); Builder.CreateCondBr( - Builder.CreateICmpNE( - Builder.CreatePtrToInt(MasterAddr.getPointer(), CGM.IntPtrTy), - Builder.CreatePtrToInt(PrivateAddr.getPointer(), - CGM.IntPtrTy)), - CopyBegin, CopyEnd); + Builder.CreateICmpNE(MasterAddrInt, PrivateAddrInt), CopyBegin, + CopyEnd); EmitBlock(CopyBegin); } const auto *SrcVD = @@ -1755,6 +1786,31 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { checkForLastprivateConditionalUpdate(*this, S); } +namespace { +/// RAII to handle scopes for loop transformation directives. +class OMPTransformDirectiveScopeRAII { + OMPLoopScope *Scope = nullptr; + CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; + CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; + +public: + OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { + if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(S)) { + Scope = new OMPLoopScope(CGF, *Dir); + CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); + CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); + } + } + ~OMPTransformDirectiveScopeRAII() { + if (!Scope) + return; + delete CapInfoRAII; + delete CGSI; + delete Scope; + } +}; +} // namespace + static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, int MaxLevel, int Level = 0) { assert(Level < MaxLevel && "Too deep lookup during loop body codegen."); @@ -1771,6 +1827,12 @@ static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, return; } if (SimplifiedS == NextLoop) { + if (auto *Dir = dyn_cast<OMPTileDirective>(SimplifiedS)) + SimplifiedS = Dir->getTransformedStmt(); + if (auto *Dir = dyn_cast<OMPUnrollDirective>(SimplifiedS)) + SimplifiedS = Dir->getTransformedStmt(); + if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(SimplifiedS)) + SimplifiedS = CanonLoop->getLoopStmt(); if (const auto *For = dyn_cast<ForStmt>(SimplifiedS)) { S = For->getBody(); } else { @@ -1845,9 +1907,9 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); // Emit loop body. emitBody(*this, Body, - OMPLoopDirective::tryToFindNextInnerLoop( + OMPLoopBasedDirective::tryToFindNextInnerLoop( Body, /*TryImperfectlyNestedLoops=*/true), - D.getCollapsedNumber()); + D.getLoopsNumber()); // Jump to the dispatcher at the end of the loop body. if (IsInscanRegion) @@ -1858,6 +1920,121 @@ void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, BreakContinueStack.pop_back(); } +using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; + +/// Emit a captured statement and return the function as well as its captured +/// closure context. +static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, + const CapturedStmt *S) { + LValue CapStruct = ParentCGF.InitCapturedStruct(*S); + CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); + std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = + std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(*S); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); + llvm::Function *F = CGF.GenerateCapturedStmtFunction(*S); + + return {F, CapStruct.getPointer(ParentCGF)}; +} + +/// Emit a call to a previously captured closure. +static llvm::CallInst * +emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, + llvm::ArrayRef<llvm::Value *> Args) { + // Append the closure context to the argument. + SmallVector<llvm::Value *> EffectiveArgs; + EffectiveArgs.reserve(Args.size() + 1); + llvm::append_range(EffectiveArgs, Args); + EffectiveArgs.push_back(Cap.second); + + return ParentCGF.Builder.CreateCall(Cap.first, EffectiveArgs); +} + +llvm::CanonicalLoopInfo * +CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { + assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented"); + + EmitStmt(S); + assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops"); + + // The last added loop is the outermost one. + return OMPLoopNestStack.back(); +} + +void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { + const Stmt *SyntacticalLoop = S->getLoopStmt(); + if (!getLangOpts().OpenMPIRBuilder) { + // Ignore if OpenMPIRBuilder is not enabled. + EmitStmt(SyntacticalLoop); + return; + } + + LexicalScope ForScope(*this, S->getSourceRange()); + + // Emit init statements. The Distance/LoopVar funcs may reference variable + // declarations they contain. + const Stmt *BodyStmt; + if (const auto *For = dyn_cast<ForStmt>(SyntacticalLoop)) { + if (const Stmt *InitStmt = For->getInit()) + EmitStmt(InitStmt); + BodyStmt = For->getBody(); + } else if (const auto *RangeFor = + dyn_cast<CXXForRangeStmt>(SyntacticalLoop)) { + if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) + EmitStmt(RangeStmt); + if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) + EmitStmt(BeginStmt); + if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) + EmitStmt(EndStmt); + if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) + EmitStmt(LoopVarStmt); + BodyStmt = RangeFor->getBody(); + } else + llvm_unreachable("Expected for-stmt or range-based for-stmt"); + + // Emit closure for later use. By-value captures will be captured here. + const CapturedStmt *DistanceFunc = S->getDistanceFunc(); + EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(*this, DistanceFunc); + const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); + EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(*this, LoopVarFunc); + + // Call the distance function to get the number of iterations of the loop to + // come. + QualType LogicalTy = DistanceFunc->getCapturedDecl() + ->getParam(0) + ->getType() + .getNonReferenceType(); + Address CountAddr = CreateMemTemp(LogicalTy, ".count.addr"); + emitCapturedStmtCall(*this, DistanceClosure, {CountAddr.getPointer()}); + llvm::Value *DistVal = Builder.CreateLoad(CountAddr, ".count"); + + // Emit the loop structure. + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, + llvm::Value *IndVar) { + Builder.restoreIP(CodeGenIP); + + // Emit the loop body: Convert the logical iteration number to the loop + // variable and emit the body. + const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); + LValue LCVal = EmitLValue(LoopVarRef); + Address LoopVarAddress = LCVal.getAddress(*this); + emitCapturedStmtCall(*this, LoopVarClosure, + {LoopVarAddress.getPointer(), IndVar}); + + RunCleanupsScope BodyScope(*this); + EmitStmt(BodyStmt); + }; + llvm::CanonicalLoopInfo *CL = + OMPBuilder.createCanonicalLoop(Builder, BodyGen, DistVal); + + // Finish up the loop. + Builder.restoreIP(CL->getAfterIP()); + ForScope.ForceCleanup(); + + // Remember the CanonicalLoopInfo for parent AST nodes consuming it. + OMPLoopNestStack.push_back(CL); +} + void CodeGenFunction::EmitOMPInnerLoop( const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, const Expr *IncExpr, @@ -1875,6 +2052,7 @@ void CodeGenFunction::EmitOMPInnerLoop( const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); const Stmt *SS = ICS->getCapturedStmt(); const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(SS); + OMPLoopNestStack.clear(); if (AS) LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), AS->getAttrs(), SourceLocToDebugLoc(R.getBegin()), @@ -2062,8 +2240,7 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters( for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { if (!C->getNumForLoops()) continue; - for (unsigned I = S.getCollapsedNumber(), - E = C->getLoopNumIterations().size(); + for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); I < E; ++I) { const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); const auto *VD = cast<VarDecl>(DRE->getDecl()); @@ -2152,8 +2329,7 @@ void CodeGenFunction::EmitOMPLinearClause( } static void emitSimdlenSafelenClause(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - bool IsMonotonic) { + const OMPExecutableDirective &D) { if (!CGF.HaveInsertPoint()) return; if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { @@ -2164,8 +2340,7 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried // dependences of 'safelen' iterations are possible. - if (!IsMonotonic) - CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); + CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), /*ignoreResult=*/true); @@ -2178,12 +2353,11 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, } } -void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, - bool IsMonotonic) { +void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { // Walk clauses and process safelen/lastprivate. - LoopStack.setParallel(!IsMonotonic); + LoopStack.setParallel(/*Enable=*/true); LoopStack.setVectorizeEnable(); - emitSimdlenSafelenClause(*this, D, IsMonotonic); + emitSimdlenSafelenClause(*this, D); if (const auto *C = D.getSingleClause<OMPOrderClause>()) if (C->getKind() == OMPC_ORDER_concurrent) LoopStack.setParallel(/*Enable=*/true); @@ -2406,6 +2580,34 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { checkForLastprivateConditionalUpdate(*this, S); } +void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { + // Emit the de-sugared statement. + OMPTransformDirectiveScopeRAII TileScope(*this, &S); + EmitStmt(S.getTransformedStmt()); +} + +void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { + // This function is only called if the unrolled loop is not consumed by any + // other loop-associated construct. Such a loop-associated construct will have + // used the transformed AST. + + // Set the unroll metadata for the next emitted loop. + LoopStack.setUnrollState(LoopAttributes::Enable); + + if (S.hasClausesOfKind<OMPFullClause>()) { + LoopStack.setUnrollState(LoopAttributes::Full); + } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { + if (Expr *FactorExpr = PartialClause->getFactor()) { + uint64_t Factor = + FactorExpr->EvaluateKnownConstInt(getContext()).getZExtValue(); + assert(Factor >= 1 && "Only positive factors are valid"); + LoopStack.setUnrollCount(Factor); + } + } + + EmitStmt(S.getAssociatedStmt()); +} + void CodeGenFunction::EmitOMPOuterLoop( bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope, @@ -2424,6 +2626,7 @@ void CodeGenFunction::EmitOMPOuterLoop( llvm::BasicBlock *CondBlock = createBasicBlock("omp.dispatch.cond"); EmitBlock(CondBlock); const SourceRange R = S.getSourceRange(); + OMPLoopNestStack.clear(); LoopStack.push(CondBlock, SourceLocToDebugLoc(R.getBegin()), SourceLocToDebugLoc(R.getEnd())); @@ -2477,7 +2680,7 @@ void CodeGenFunction::EmitOMPOuterLoop( if (C->getKind() == OMPC_ORDER_concurrent) CGF.LoopStack.setParallel(/*Enable=*/true); } else { - CGF.EmitOMPSimdInit(S, IsMonotonic); + CGF.EmitOMPSimdInit(S); } }, [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, @@ -2507,6 +2710,7 @@ void CodeGenFunction::EmitOMPOuterLoop( } EmitBranch(CondBlock); + OMPLoopNestStack.clear(); LoopStack.pop(); // Emit the fall-through block. EmitBlock(LoopExit.getBlock()); @@ -2986,8 +3190,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); bool IsMonotonic = Ordered || - ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || - ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && + (ScheduleKind.Schedule == OMPC_SCHEDULE_static && !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || @@ -3000,9 +3203,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); emitCommonSimdLoop( *this, S, - [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { + [&S](CodeGenFunction &CGF, PrePostActionTy &) { if (isOpenMPSimdDirective(S.getDirectiveKind())) { - CGF.EmitOMPSimdInit(S, IsMonotonic); + CGF.EmitOMPSimdInit(S); } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { if (C->getKind() == OMPC_ORDER_concurrent) CGF.LoopStack.setParallel(/*Enable=*/true); @@ -3131,53 +3334,30 @@ emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, return {LBVal, UBVal}; } -/// Emits the code for the directive with inscan reductions. +/// Emits internal temp array declarations for the directive with inscan +/// reductions. /// The code is the following: /// \code /// size num_iters = <num_iters>; /// <type> buffer[num_iters]; -/// #pragma omp ... -/// for (i: 0..<num_iters>) { -/// <input phase>; -/// buffer[i] = red; -/// } -/// for (int k = 0; k != ceil(log2(num_iters)); ++k) -/// for (size cnt = last_iter; cnt >= pow(2, k); --k) -/// buffer[i] op= buffer[i-pow(2,k)]; -/// #pragma omp ... -/// for (0..<num_iters>) { -/// red = InclusiveScan ? buffer[i] : buffer[i-1]; -/// <scan phase>; -/// } /// \endcode -static void emitScanBasedDirective( +static void emitScanBasedDirectiveDecls( CodeGenFunction &CGF, const OMPLoopDirective &S, - llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, - llvm::function_ref<void(CodeGenFunction &)> FirstGen, - llvm::function_ref<void(CodeGenFunction &)> SecondGen) { + llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); SmallVector<const Expr *, 4> Shareds; SmallVector<const Expr *, 4> Privates; SmallVector<const Expr *, 4> ReductionOps; - SmallVector<const Expr *, 4> LHSs; - SmallVector<const Expr *, 4> RHSs; - SmallVector<const Expr *, 4> CopyOps; SmallVector<const Expr *, 4> CopyArrayTemps; - SmallVector<const Expr *, 4> CopyArrayElems; for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { assert(C->getModifier() == OMPC_REDUCTION_inscan && "Only inscan reductions are expected."); Shareds.append(C->varlist_begin(), C->varlist_end()); Privates.append(C->privates().begin(), C->privates().end()); ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); - LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); - RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); - CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); CopyArrayTemps.append(C->copy_array_temps().begin(), C->copy_array_temps().end()); - CopyArrayElems.append(C->copy_array_elems().begin(), - C->copy_array_elems().end()); } { // Emit buffers for each reduction variables. @@ -3206,6 +3386,49 @@ static void emitScanBasedDirective( ++Count; } } +} + +/// Emits the code for the directive with inscan reductions. +/// The code is the following: +/// \code +/// #pragma omp ... +/// for (i: 0..<num_iters>) { +/// <input phase>; +/// buffer[i] = red; +/// } +/// #pragma omp master // in parallel region +/// for (int k = 0; k != ceil(log2(num_iters)); ++k) +/// for (size cnt = last_iter; cnt >= pow(2, k); --k) +/// buffer[i] op= buffer[i-pow(2,k)]; +/// #pragma omp barrier // in parallel region +/// #pragma omp ... +/// for (0..<num_iters>) { +/// red = InclusiveScan ? buffer[i] : buffer[i-1]; +/// <scan phase>; +/// } +/// \endcode +static void emitScanBasedDirective( + CodeGenFunction &CGF, const OMPLoopDirective &S, + llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, + llvm::function_ref<void(CodeGenFunction &)> FirstGen, + llvm::function_ref<void(CodeGenFunction &)> SecondGen) { + llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( + NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); + SmallVector<const Expr *, 4> Privates; + SmallVector<const Expr *, 4> ReductionOps; + SmallVector<const Expr *, 4> LHSs; + SmallVector<const Expr *, 4> RHSs; + SmallVector<const Expr *, 4> CopyArrayElems; + for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { + assert(C->getModifier() == OMPC_REDUCTION_inscan && + "Only inscan reductions are expected."); + Privates.append(C->privates().begin(), C->privates().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); { // Emit loop with input phase: @@ -3218,90 +3441,108 @@ static void emitScanBasedDirective( CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); FirstGen(CGF); } - // Emit prefix reduction: - // for (int k = 0; k <= ceil(log2(n)); ++k) - llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); - llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); - llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); - llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); - llvm::Value *Arg = - CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); - llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); - F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); - LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); - LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); - llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( - OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); - auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); - CGF.EmitBlock(LoopBB); - auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); - // size pow2k = 1; - auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); - Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); - Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); - // for (size i = n - 1; i >= 2 ^ k; --i) - // tmp[i] op= tmp[i-pow2k]; - llvm::BasicBlock *InnerLoopBB = - CGF.createBasicBlock("omp.inner.log.scan.body"); - llvm::BasicBlock *InnerExitBB = - CGF.createBasicBlock("omp.inner.log.scan.exit"); - llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); - CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); - CGF.EmitBlock(InnerLoopBB); - auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); - IVal->addIncoming(NMin1, LoopBB); - { - CodeGenFunction::OMPPrivateScope PrivScope(CGF); - auto *ILHS = LHSs.begin(); - auto *IRHS = RHSs.begin(); - for (const Expr *CopyArrayElem : CopyArrayElems) { - const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); - const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); - Address LHSAddr = Address::invalid(); - { - CodeGenFunction::OpaqueValueMapping IdxMapping( - CGF, - cast<OpaqueValueExpr>( - cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), - RValue::get(IVal)); - LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); - } - PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); - Address RHSAddr = Address::invalid(); - { - llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); - CodeGenFunction::OpaqueValueMapping IdxMapping( - CGF, - cast<OpaqueValueExpr>( - cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), - RValue::get(OffsetIVal)); - RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + // #pragma omp barrier // in parallel region + auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, + &ReductionOps, + &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + // Emit prefix reduction: + // #pragma omp master // in parallel region + // for (int k = 0; k <= ceil(log2(n)); ++k) + llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); + llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.outer.log.scan.body"); + llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.outer.log.scan.exit"); + llvm::Function *F = + CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); + llvm::Value *Arg = + CGF.Builder.CreateUIToFP(OMPScanNumIterations, CGF.DoubleTy); + llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(F, Arg); + F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); + LogVal = CGF.EmitNounwindRuntimeCall(F, LogVal); + LogVal = CGF.Builder.CreateFPToUI(LogVal, CGF.IntTy); + llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( + OMPScanNumIterations, llvm::ConstantInt::get(CGF.SizeTy, 1)); + auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getBeginLoc()); + CGF.EmitBlock(LoopBB); + auto *Counter = CGF.Builder.CreatePHI(CGF.IntTy, 2); + // size pow2k = 1; + auto *Pow2K = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + Counter->addIncoming(llvm::ConstantInt::get(CGF.IntTy, 0), InputBB); + Pow2K->addIncoming(llvm::ConstantInt::get(CGF.SizeTy, 1), InputBB); + // for (size i = n - 1; i >= 2 ^ k; --i) + // tmp[i] op= tmp[i-pow2k]; + llvm::BasicBlock *InnerLoopBB = + CGF.createBasicBlock("omp.inner.log.scan.body"); + llvm::BasicBlock *InnerExitBB = + CGF.createBasicBlock("omp.inner.log.scan.exit"); + llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(NMin1, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerLoopBB); + auto *IVal = CGF.Builder.CreatePHI(CGF.SizeTy, 2); + IVal->addIncoming(NMin1, LoopBB); + { + CodeGenFunction::OMPPrivateScope PrivScope(CGF); + auto *ILHS = LHSs.begin(); + auto *IRHS = RHSs.begin(); + for (const Expr *CopyArrayElem : CopyArrayElems) { + const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); + const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); + Address LHSAddr = Address::invalid(); + { + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(IVal)); + LHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(LHSVD, [LHSAddr]() { return LHSAddr; }); + Address RHSAddr = Address::invalid(); + { + llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(IVal, Pow2K); + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast<OpaqueValueExpr>( + cast<ArraySubscriptExpr>(CopyArrayElem)->getIdx()), + RValue::get(OffsetIVal)); + RHSAddr = CGF.EmitLValue(CopyArrayElem).getAddress(CGF); + } + PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); + ++ILHS; + ++IRHS; } - PrivScope.addPrivate(RHSVD, [RHSAddr]() { return RHSAddr; }); - ++ILHS; - ++IRHS; + PrivScope.Privatize(); + CGF.CGM.getOpenMPRuntime().emitReduction( + CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, + {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); } - PrivScope.Privatize(); - CGF.CGM.getOpenMPRuntime().emitReduction( - CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, - {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); - } - llvm::Value *NextIVal = - CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); - IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); - CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); - CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); - CGF.EmitBlock(InnerExitBB); - llvm::Value *Next = - CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); - Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); - // pow2k <<= 1; - llvm::Value *NextPow2K = CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); - Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); - llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); - CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); - auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); - CGF.EmitBlock(ExitBB); + llvm::Value *NextIVal = + CGF.Builder.CreateNUWSub(IVal, llvm::ConstantInt::get(CGF.SizeTy, 1)); + IVal->addIncoming(NextIVal, CGF.Builder.GetInsertBlock()); + CmpI = CGF.Builder.CreateICmpUGE(NextIVal, Pow2K); + CGF.Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB); + CGF.EmitBlock(InnerExitBB); + llvm::Value *Next = + CGF.Builder.CreateNUWAdd(Counter, llvm::ConstantInt::get(CGF.IntTy, 1)); + Counter->addIncoming(Next, CGF.Builder.GetInsertBlock()); + // pow2k <<= 1; + llvm::Value *NextPow2K = + CGF.Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true); + Pow2K->addIncoming(NextPow2K, CGF.Builder.GetInsertBlock()); + llvm::Value *Cmp = CGF.Builder.CreateICmpNE(Next, LogVal); + CGF.Builder.CreateCondBr(Cmp, LoopBB, ExitBB); + auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, S.getEndLoc()); + CGF.EmitBlock(ExitBB); + }; + if (isOpenMPParallelDirective(S.getDirectiveKind())) { + CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, CodeGen, S.getBeginLoc()); + CGF.CGM.getOpenMPRuntime().emitBarrierCall( + CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, + /*ForceSimpleCall=*/true); + } else { + RegionCodeGenTy RCG(CodeGen); + RCG(CGF); + } CGF.OMPFirstScanLoop = false; SecondGen(CGF); @@ -3338,6 +3579,8 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF, emitForLoopBounds, emitDispatchForLoopBounds); }; + if (!isOpenMPParallelDirective(S.getDirectiveKind())) + emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); } else { CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), @@ -3349,10 +3592,38 @@ static bool emitWorksharingDirective(CodeGenFunction &CGF, return HasLastprivates; } +static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { + if (S.hasCancel()) + return false; + for (OMPClause *C : S.clauses()) + if (!isa<OMPNowaitClause>(C)) + return false; + + return true; +} + void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { bool HasLastprivates = false; - auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, - PrePostActionTy &) { + bool UseOMPIRBuilder = + CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); + auto &&CodeGen = [this, &S, &HasLastprivates, + UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { + // Use the OpenMPIRBuilder if enabled. + if (UseOMPIRBuilder) { + // Emit the associated statement and get its loop representation. + const Stmt *Inner = S.getRawStmt(); + llvm::CanonicalLoopInfo *CLI = + EmitOMPCollapsedCanonicalLoopNest(Inner, 1); + + bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); + llvm::OpenMPIRBuilder &OMPBuilder = + CGM.getOpenMPRuntime().getOMPBuilder(); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); + OMPBuilder.createWorkshareLoop(Builder, CLI, AllocaIP, NeedsBarrier); + return; + } + HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); }; { @@ -3363,9 +3634,11 @@ void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { S.hasCancel()); } - // Emit an implicit barrier at the end. - if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) - CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + if (!UseOMPIRBuilder) { + // Emit an implicit barrier at the end. + if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) + CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); + } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); } @@ -3428,11 +3701,11 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); // Generate condition for loop. BinaryOperator *Cond = BinaryOperator::Create( - C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_RValue, OK_Ordinary, + C, &IVRefExpr, &UBRefExpr, BO_LE, C.BoolTy, VK_PRValue, OK_Ordinary, S.getBeginLoc(), FPOptionsOverride()); // Increment for loop counter. UnaryOperator *Inc = UnaryOperator::Create( - C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_RValue, OK_Ordinary, + C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, S.getBeginLoc(), true, FPOptionsOverride()); auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { // Iterate through all sections and emit a switch construct: @@ -3546,6 +3819,64 @@ void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { } void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + const CapturedStmt *ICS = S.getInnermostCapturedStmt(); + const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); + const auto *CS = dyn_cast<CompoundStmt>(CapturedStmt); + llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; + if (CS) { + for (const Stmt *SubStmt : CS->children()) { + auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, + FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SubStmt, CodeGenIP, + FiniBB); + }; + SectionCBVector.push_back(SectionCB); + } + } else { + auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, CapturedStmt, CodeGenIP, + FiniBB); + }; + SectionCBVector.push_back(SectionCB); + } + + // Privatization callback that performs appropriate action for + // shared/private/firstprivate/lastprivate/copyin/... variables. + // + // TODO: This defaults to shared right now. + auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, + llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { + // The next line is appropriate only for variables (Val) with the + // data-sharing attribute "shared". + ReplVal = &Val; + + return CodeGenIP; + }; + + CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); + llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( + AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); + Builder.restoreIP(OMPBuilder.createSections( + Builder, AllocaIP, SectionCBVector, PrivCB, FiniCB, S.hasCancel(), + S.getSingleClause<OMPNowaitClause>())); + return; + } { auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); @@ -3562,6 +3893,29 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { } void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, SectionRegionBodyStmt, + CodeGenIP, FiniBB); + }; + + LexicalScope Scope(*this, S.getSourceRange()); + EmitStopPoint(&S); + Builder.restoreIP(OMPBuilder.createSection(Builder, BodyGenCB, FiniCB)); + + return; + } LexicalScope Scope(*this, S.getSourceRange()); EmitStopPoint(&S); EmitStmt(S.getAssociatedStmt()); @@ -3650,6 +4004,55 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { emitMaster(*this, S); } +static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { + auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + CGF.EmitStmt(S.getRawStmt()); + }; + Expr *Filter = nullptr; + if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) + Filter = FilterClause->getThreadID(); + CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, CodeGen, S.getBeginLoc(), + Filter); +} + +void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { + if (CGM.getLangOpts().OpenMPIRBuilder) { + llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + + const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); + const Expr *Filter = nullptr; + if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) + Filter = FilterClause->getThreadID(); + llvm::Value *FilterVal = Filter + ? EmitScalarExpr(Filter, CGM.Int32Ty) + : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); + + auto FiniCB = [this](InsertPointTy IP) { + OMPBuilderCBHelpers::FinalizeOMPRegion(*this, IP); + }; + + auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, + InsertPointTy CodeGenIP, + llvm::BasicBlock &FiniBB) { + OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(*this, AllocaIP, FiniBB); + OMPBuilderCBHelpers::EmitOMPRegionBody(*this, MaskedRegionBodyStmt, + CodeGenIP, FiniBB); + }; + + LexicalScope Scope(*this, S.getSourceRange()); + EmitStopPoint(&S); + Builder.restoreIP( + OMPBuilder.createMasked(Builder, BodyGenCB, FiniCB, FilterVal)); + + return; + } + LexicalScope Scope(*this, S.getSourceRange()); + EmitStopPoint(&S); + emitMasked(*this, S); +} + void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { if (CGM.getLangOpts().OpenMPIRBuilder) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); @@ -3712,6 +4115,19 @@ void CodeGenFunction::EmitOMPParallelForDirective( (void)emitWorksharingDirective(CGF, S, S.hasCancel()); }; { + if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) { + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); + } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, @@ -3730,6 +4146,19 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective( (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; { + if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) { + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); + } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, @@ -3892,7 +4321,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( } } // Get list of lastprivate variables (for taskloops). - llvm::DenseMap<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; + llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { auto IRef = C->varlist_begin(); auto ID = C->destination_exprs().begin(); @@ -3903,8 +4332,8 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Data.LastprivateCopies.push_back(IInit); } LastprivateDstsOrigs.insert( - {cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), - cast<DeclRefExpr>(*IRef)}); + std::make_pair(cast<VarDecl>(cast<DeclRefExpr>(*ID)->getDecl()), + cast<DeclRefExpr>(*IRef))); ++IRef; ++ID; } @@ -3938,15 +4367,14 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, CapturedRegion](CodeGenFunction &CGF, PrePostActionTy &Action) { - llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, std::pair<Address, Address>> + llvm::MapVector<CanonicalDeclPtr<const VarDecl>, + std::pair<Address, Address>> UntiedLocalVars; // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { - llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( - CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; llvm::Value *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); @@ -3955,13 +4383,16 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( // Map privates. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; llvm::SmallVector<llvm::Value *, 16> CallArgs; + llvm::SmallVector<llvm::Type *, 4> ParamTypes; CallArgs.push_back(PrivatesPtr); + ParamTypes.push_back(PrivatesPtr->getType()); for (const Expr *E : Data.PrivateVars) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp( CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); + ParamTypes.push_back(PrivatePtr.getType()); } for (const Expr *E : Data.FirstprivateVars) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); @@ -3971,6 +4402,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( PrivatePtrs.emplace_back(VD, PrivatePtr); FirstprivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); + ParamTypes.push_back(PrivatePtr.getType()); } for (const Expr *E : Data.LastprivateVars) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); @@ -3979,6 +4411,7 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( ".lastpriv.ptr.addr"); PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); + ParamTypes.push_back(PrivatePtr.getType()); } for (const VarDecl *VD : Data.PrivateLocals) { QualType Ty = VD->getType().getNonReferenceType(); @@ -3988,9 +4421,19 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( Ty = CGF.getContext().getPointerType(Ty); Address PrivatePtr = CGF.CreateMemTemp( CGF.getContext().getPointerType(Ty), ".local.ptr.addr"); - UntiedLocalVars.try_emplace(VD, PrivatePtr, Address::invalid()); + auto Result = UntiedLocalVars.insert( + std::make_pair(VD, std::make_pair(PrivatePtr, Address::invalid()))); + // If key exists update in place. + if (Result.second == false) + *Result.first = std::make_pair( + VD, std::make_pair(PrivatePtr, Address::invalid())); CallArgs.push_back(PrivatePtr.getPointer()); + ParamTypes.push_back(PrivatePtr.getType()); } + auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), + ParamTypes, /*isVarArg=*/false); + CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CopyFn, CopyFnTy->getPointerTo()); CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : LastprivateDstsOrigs) { @@ -4015,14 +4458,14 @@ void CodeGenFunction::EmitOMPTaskBasedDirective( if (isAllocatableDecl(Pair.first)) { llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); Address Replacement(Ptr, CGF.getPointerAlign()); - Pair.getSecond().first = Replacement; + Pair.second.first = Replacement; Ptr = CGF.Builder.CreateLoad(Replacement); Replacement = Address(Ptr, CGF.getContext().getDeclAlign(Pair.first)); - Pair.getSecond().second = Replacement; + Pair.second.second = Replacement; } else { llvm::Value *Ptr = CGF.Builder.CreateLoad(Pair.second.first); Address Replacement(Ptr, CGF.getContext().getDeclAlign(Pair.first)); - Pair.getSecond().first = Replacement; + Pair.second.first = Replacement; } } } @@ -4156,7 +4599,7 @@ createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, PrivateVD->setInitStyle(VarDecl::CInit); PrivateVD->setInit(ImplicitCastExpr::Create(C, ElemType, CK_LValueToRValue, InitRef, /*BasePath=*/nullptr, - VK_RValue, FPOptionsOverride())); + VK_PRValue, FPOptionsOverride())); Data.FirstprivateVars.emplace_back(OrigRef); Data.FirstprivateCopies.emplace_back(PrivateRef); Data.FirstprivateInits.emplace_back(InitRef); @@ -4238,8 +4681,6 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( // Set proper addresses for generated private copies. OMPPrivateScope Scope(CGF); if (!Data.FirstprivateVars.empty()) { - llvm::FunctionType *CopyFnTy = llvm::FunctionType::get( - CGF.Builder.getVoidTy(), {CGF.Builder.getInt8PtrTy()}, true); enum { PrivatesParam = 2, CopyFnParam = 3 }; llvm::Value *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(CopyFnParam))); @@ -4248,7 +4689,9 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( // Map privates. llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; llvm::SmallVector<llvm::Value *, 16> CallArgs; + llvm::SmallVector<llvm::Type *, 4> ParamTypes; CallArgs.push_back(PrivatesPtr); + ParamTypes.push_back(PrivatesPtr->getType()); for (const Expr *E : Data.FirstprivateVars) { const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = @@ -4256,7 +4699,12 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( ".firstpriv.ptr.addr"); PrivatePtrs.emplace_back(VD, PrivatePtr); CallArgs.push_back(PrivatePtr.getPointer()); + ParamTypes.push_back(PrivatePtr.getType()); } + auto *CopyFnTy = llvm::FunctionType::get(CGF.Builder.getVoidTy(), + ParamTypes, /*isVarArg=*/false); + CopyFn = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + CopyFn, CopyFnTy->getPointerTo()); CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( CGF, S.getBeginLoc(), {CopyFnTy, CopyFn}, CallArgs); for (const auto &Pair : PrivatePtrs) { @@ -4779,7 +5227,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, *this, S, [&S](CodeGenFunction &CGF, PrePostActionTy &) { if (isOpenMPSimdDirective(S.getDirectiveKind())) - CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + CGF.EmitOMPSimdInit(S); }, [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { @@ -4859,6 +5307,8 @@ static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, CGF.CapturedStmtInfo = &CapStmtInfo; llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(*S, Loc); Fn->setDoesNotRecurse(); + if (CGM.getCodeGenOpts().OptimizationLevel != 0) + Fn->addFnAttr(llvm::Attribute::AlwaysInline); return Fn; } @@ -5281,32 +5731,35 @@ static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, // Emit post-update store to 'v' of old/new 'x' value. CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, V); - // OpenMP, 2.17.7, atomic Construct - // If the write, update, or capture clause is specified and the release, - // acq_rel, or seq_cst clause is specified then the strong flush on entry to - // the atomic operation is also a release flush. - // If the read or capture clause is specified and the acquire, acq_rel, or - // seq_cst clause is specified then the strong flush on exit from the atomic - // operation is also an acquire flush. - switch (AO) { - case llvm::AtomicOrdering::Release: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, - llvm::AtomicOrdering::Release); - break; - case llvm::AtomicOrdering::Acquire: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, - llvm::AtomicOrdering::Acquire); - break; - case llvm::AtomicOrdering::AcquireRelease: - case llvm::AtomicOrdering::SequentiallyConsistent: - CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, - llvm::AtomicOrdering::AcquireRelease); - break; - case llvm::AtomicOrdering::Monotonic: - break; - case llvm::AtomicOrdering::NotAtomic: - case llvm::AtomicOrdering::Unordered: - llvm_unreachable("Unexpected ordering."); + // OpenMP 5.1 removes the required flush for capture clause. + if (CGF.CGM.getLangOpts().OpenMP < 51) { + // OpenMP, 2.17.7, atomic Construct + // If the write, update, or capture clause is specified and the release, + // acq_rel, or seq_cst clause is specified then the strong flush on entry to + // the atomic operation is also a release flush. + // If the read or capture clause is specified and the acquire, acq_rel, or + // seq_cst clause is specified then the strong flush on exit from the atomic + // operation is also an acquire flush. + switch (AO) { + case llvm::AtomicOrdering::Release: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Release); + break; + case llvm::AtomicOrdering::Acquire: + CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc, + llvm::AtomicOrdering::Acquire); + break; + case llvm::AtomicOrdering::AcquireRelease: + case llvm::AtomicOrdering::SequentiallyConsistent: + CGF.CGM.getOpenMPRuntime().emitFlush( + CGF, llvm::None, Loc, llvm::AtomicOrdering::AcquireRelease); + break; + case llvm::AtomicOrdering::Monotonic: + break; + case llvm::AtomicOrdering::NotAtomic: + case llvm::AtomicOrdering::Unordered: + llvm_unreachable("Unexpected ordering."); + } } } @@ -5341,6 +5794,9 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_in_reduction: case OMPC_safelen: case OMPC_simdlen: + case OMPC_sizes: + case OMPC_full: + case OMPC_partial: case OMPC_allocator: case OMPC_allocate: case OMPC_collapse: @@ -5399,7 +5855,14 @@ static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, case OMPC_exclusive: case OMPC_uses_allocators: case OMPC_affinity: - default: + case OMPC_init: + case OMPC_inbranch: + case OMPC_notinbranch: + case OMPC_link: + case OMPC_use: + case OMPC_novariants: + case OMPC_nocontext: + case OMPC_filter: llvm_unreachable("Clause is not allowed in 'omp atomic'."); } } @@ -5431,7 +5894,7 @@ void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { C->getClauseKind() != OMPC_acq_rel && C->getClauseKind() != OMPC_acquire && C->getClauseKind() != OMPC_release && - C->getClauseKind() != OMPC_relaxed) { + C->getClauseKind() != OMPC_relaxed && C->getClauseKind() != OMPC_hint) { Kind = C->getClauseKind(); break; } @@ -5554,6 +6017,7 @@ static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); + CGF.EnsureInsertPoint(); } void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, @@ -5970,7 +6434,9 @@ void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); // TODO: This check is necessary as we only generate `omp parallel` through // the OpenMPIRBuilder for now. - if (S.getCancelRegion() == OMPD_parallel) { + if (S.getCancelRegion() == OMPD_parallel || + S.getCancelRegion() == OMPD_sections || + S.getCancelRegion() == OMPD_section) { llvm::Value *IfCondition = nullptr; if (IfCond) IfCondition = EmitScalarExpr(IfCond, @@ -6695,7 +7161,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { if (!C->getNumForLoops()) continue; - for (unsigned I = LD->getCollapsedNumber(), + for (unsigned I = LD->getLoopsNumber(), E = C->getLoopNumIterations().size(); I < E; ++I) { if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( @@ -6714,7 +7180,8 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective( if (D.getDirectiveKind() == OMPD_atomic || D.getDirectiveKind() == OMPD_critical || D.getDirectiveKind() == OMPD_section || - D.getDirectiveKind() == OMPD_master) { + D.getDirectiveKind() == OMPD_master || + D.getDirectiveKind() == OMPD_masked) { EmitStmt(D.getAssociatedStmt()); } else { auto LPCRegion = |