diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp')
-rw-r--r-- | contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp | 301 |
1 files changed, 245 insertions, 56 deletions
diff --git a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp index 672909849bb7..a1a72a9f668d 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CGStmt.cpp @@ -18,9 +18,11 @@ #include "clang/AST/Attr.h" #include "clang/AST/StmtVisitor.h" #include "clang/Basic/Builtins.h" +#include "clang/Basic/DiagnosticSema.h" #include "clang/Basic/PrettyStackTrace.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -50,7 +52,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { PGO.setCurrentStmt(S); // These statements have their own debug info handling. - if (EmitSimpleStmt(S)) + if (EmitSimpleStmt(S, Attrs)) return; // Check if we are generating unreachable code. @@ -370,23 +372,44 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef<const Attr *> Attrs) { } } -bool CodeGenFunction::EmitSimpleStmt(const Stmt *S) { +bool CodeGenFunction::EmitSimpleStmt(const Stmt *S, + ArrayRef<const Attr *> Attrs) { switch (S->getStmtClass()) { - default: return false; - case Stmt::NullStmtClass: break; - case Stmt::CompoundStmtClass: EmitCompoundStmt(cast<CompoundStmt>(*S)); break; - case Stmt::DeclStmtClass: EmitDeclStmt(cast<DeclStmt>(*S)); break; - case Stmt::LabelStmtClass: EmitLabelStmt(cast<LabelStmt>(*S)); break; + default: + return false; + case Stmt::NullStmtClass: + break; + case Stmt::CompoundStmtClass: + EmitCompoundStmt(cast<CompoundStmt>(*S)); + break; + case Stmt::DeclStmtClass: + EmitDeclStmt(cast<DeclStmt>(*S)); + break; + case Stmt::LabelStmtClass: + EmitLabelStmt(cast<LabelStmt>(*S)); + break; case Stmt::AttributedStmtClass: - EmitAttributedStmt(cast<AttributedStmt>(*S)); break; - case Stmt::GotoStmtClass: EmitGotoStmt(cast<GotoStmt>(*S)); break; - case Stmt::BreakStmtClass: EmitBreakStmt(cast<BreakStmt>(*S)); break; - case Stmt::ContinueStmtClass: EmitContinueStmt(cast<ContinueStmt>(*S)); break; - case Stmt::DefaultStmtClass: EmitDefaultStmt(cast<DefaultStmt>(*S)); break; - case Stmt::CaseStmtClass: EmitCaseStmt(cast<CaseStmt>(*S)); break; - case Stmt::SEHLeaveStmtClass: EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S)); break; + EmitAttributedStmt(cast<AttributedStmt>(*S)); + break; + case Stmt::GotoStmtClass: + EmitGotoStmt(cast<GotoStmt>(*S)); + break; + case Stmt::BreakStmtClass: + EmitBreakStmt(cast<BreakStmt>(*S)); + break; + case Stmt::ContinueStmtClass: + EmitContinueStmt(cast<ContinueStmt>(*S)); + break; + case Stmt::DefaultStmtClass: + EmitDefaultStmt(cast<DefaultStmt>(*S), Attrs); + break; + case Stmt::CaseStmtClass: + EmitCaseStmt(cast<CaseStmt>(*S), Attrs); + break; + case Stmt::SEHLeaveStmtClass: + EmitSEHLeaveStmt(cast<SEHLeaveStmt>(*S)); + break; } - return true; } @@ -695,8 +718,14 @@ void CodeGenFunction::EmitIfStmt(const IfStmt &S) { if (S.getElse()) ElseBlock = createBasicBlock("if.else"); - EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, - getProfileCount(S.getThen())); + // Prefer the PGO based weights over the likelihood attribute. + // When the build isn't optimized the metadata isn't used, so don't generate + // it. + Stmt::Likelihood LH = Stmt::LH_None; + uint64_t Count = getProfileCount(S.getThen()); + if (!Count && CGM.getCodeGenOpts().OptimizationLevel) + LH = Stmt::getLikelihood(S.getThen(), S.getElse()); + EmitBranchOnBoolExpr(S.getCond(), ThenBlock, ElseBlock, Count, LH); // Emit the 'then' code. EmitBlock(ThenBlock); @@ -736,11 +765,6 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, JumpDest LoopHeader = getJumpDestInCurrentScope("while.cond"); EmitBlock(LoopHeader.getBlock()); - const SourceRange &R = S.getSourceRange(); - LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), CGM.getCodeGenOpts(), - WhileAttrs, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); - // Create an exit block for when the condition fails, which will // also become the break target. JumpDest LoopExit = getJumpDestInCurrentScope("while.end"); @@ -768,9 +792,19 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, // while(1) is common, avoid extra exit blocks. Be sure // to correctly handle break/continue though. bool EmitBoolCondBranch = true; - if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal)) - if (C->isOne()) + bool LoopMustProgress = false; + if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal)) { + if (C->isOne()) { EmitBoolCondBranch = false; + FnIsMustProgress = false; + } + } else if (LanguageRequiresProgress()) + LoopMustProgress = true; + + const SourceRange &R = S.getSourceRange(); + LoopStack.push(LoopHeader.getBlock(), CGM.getContext(), CGM.getCodeGenOpts(), + WhileAttrs, SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd()), LoopMustProgress); // As long as the condition is true, go to the loop body. llvm::BasicBlock *LoopBody = createBasicBlock("while.body"); @@ -778,14 +812,22 @@ void CodeGenFunction::EmitWhileStmt(const WhileStmt &S, llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); if (ConditionScope.requiresCleanups()) ExitBlock = createBasicBlock("while.exit"); - Builder.CreateCondBr( - BoolCondVal, LoopBody, ExitBlock, - createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()))); + llvm::MDNode *Weights = createProfileOrBranchWeightsForLoop( + S.getCond(), getProfileCount(S.getBody()), S.getBody()); + Builder.CreateCondBr(BoolCondVal, LoopBody, ExitBlock, Weights); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); EmitBranchThroughCleanup(LoopExit); } + } else if (const Attr *A = Stmt::getLikelihoodAttr(S.getBody())) { + CGM.getDiags().Report(A->getLocation(), + diag::warn_attribute_has_no_effect_on_infinite_loop) + << A << A->getRange(); + CGM.getDiags().Report( + S.getWhileLoc(), + diag::note_attribute_has_no_effect_on_infinite_loop_here) + << SourceRange(S.getWhileLoc(), S.getRParenLoc()); } // Emit the loop body. We have to emit this in a cleanup scope @@ -838,11 +880,6 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, EmitBlock(LoopCond.getBlock()); - const SourceRange &R = S.getSourceRange(); - LoopStack.push(LoopBody, CGM.getContext(), CGM.getCodeGenOpts(), DoAttrs, - SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); - // C99 6.8.5.2: "The evaluation of the controlling expression takes place // after each execution of the loop body." @@ -856,9 +893,19 @@ void CodeGenFunction::EmitDoStmt(const DoStmt &S, // "do {} while (0)" is common in macros, avoid extra blocks. Be sure // to correctly handle break/continue though. bool EmitBoolCondBranch = true; - if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal)) + bool LoopMustProgress = false; + if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal)) { if (C->isZero()) EmitBoolCondBranch = false; + else if (C->isOne()) + FnIsMustProgress = false; + } else if (LanguageRequiresProgress()) + LoopMustProgress = true; + + const SourceRange &R = S.getSourceRange(); + LoopStack.push(LoopBody, CGM.getContext(), CGM.getCodeGenOpts(), DoAttrs, + SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd()), LoopMustProgress); // As long as the condition is true, iterate the loop. if (EmitBoolCondBranch) { @@ -896,10 +943,20 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, llvm::BasicBlock *CondBlock = Continue.getBlock(); EmitBlock(CondBlock); + bool LoopMustProgress = false; + Expr::EvalResult Result; + if (LanguageRequiresProgress()) { + if (!S.getCond()) { + FnIsMustProgress = false; + } else if (!S.getCond()->EvaluateAsInt(Result, getContext())) { + LoopMustProgress = true; + } + } + const SourceRange &R = S.getSourceRange(); LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, SourceLocToDebugLoc(R.getBegin()), - SourceLocToDebugLoc(R.getEnd())); + SourceLocToDebugLoc(R.getEnd()), LoopMustProgress); // If the for loop doesn't have an increment we can just use the // condition as the continue block. Otherwise we'll need to create @@ -933,9 +990,14 @@ void CodeGenFunction::EmitForStmt(const ForStmt &S, // C99 6.8.5p2/p4: The first substatement is executed if the expression // compares unequal to 0. The condition must be a scalar type. llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); - Builder.CreateCondBr( - BoolCondVal, ForBody, ExitBlock, - createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()))); + llvm::MDNode *Weights = createProfileOrBranchWeightsForLoop( + S.getCond(), getProfileCount(S.getBody()), S.getBody()); + + if (llvm::ConstantInt *C = dyn_cast<llvm::ConstantInt>(BoolCondVal)) + if (C->isOne()) + FnIsMustProgress = false; + + Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -1014,9 +1076,9 @@ CodeGenFunction::EmitCXXForRangeStmt(const CXXForRangeStmt &S, // The body is executed if the expression, contextually converted // to bool, is true. llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); - Builder.CreateCondBr( - BoolCondVal, ForBody, ExitBlock, - createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()))); + llvm::MDNode *Weights = createProfileOrBranchWeightsForLoop( + S.getCond(), getProfileCount(S.getBody()), S.getBody()); + Builder.CreateCondBr(BoolCondVal, ForBody, ExitBlock, Weights); if (ExitBlock != LoopExit.getBlock()) { EmitBlock(ExitBlock); @@ -1215,7 +1277,8 @@ void CodeGenFunction::EmitContinueStmt(const ContinueStmt &S) { /// EmitCaseStmtRange - If case statement range is not too big then /// add multiple cases to switch instruction, one for each value within /// the range. If range is too big then emit "if" condition check. -void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { +void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S, + ArrayRef<const Attr *> Attrs) { assert(S.getRHS() && "Expected RHS value in CaseStmt"); llvm::APSInt LHS = S.getLHS()->EvaluateKnownConstInt(getContext()); @@ -1232,6 +1295,7 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { if (LHS.isSigned() ? RHS.slt(LHS) : RHS.ult(LHS)) return; + Stmt::Likelihood LH = Stmt::getLikelihood(Attrs); llvm::APInt Range = RHS - LHS; // FIXME: parameters such as this should not be hardcoded. if (Range.ult(llvm::APInt(Range.getBitWidth(), 64))) { @@ -1246,6 +1310,9 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { for (unsigned I = 0; I != NCases; ++I) { if (SwitchWeights) SwitchWeights->push_back(Weight + (Rem ? 1 : 0)); + else if (SwitchLikelihood) + SwitchLikelihood->push_back(LH); + if (Rem) Rem--; SwitchInsn->addCase(Builder.getInt(LHS), CaseDest); @@ -1283,7 +1350,9 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { // need to update the weight for the default, ie, the first case, to include // this case. (*SwitchWeights)[0] += ThisCount; - } + } else if (SwitchLikelihood) + Weights = createBranchWeights(LH); + Builder.CreateCondBr(Cond, CaseDest, FalseDest, Weights); // Restore the appropriate insertion point. @@ -1293,7 +1362,8 @@ void CodeGenFunction::EmitCaseStmtRange(const CaseStmt &S) { Builder.ClearInsertionPoint(); } -void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { +void CodeGenFunction::EmitCaseStmt(const CaseStmt &S, + ArrayRef<const Attr *> Attrs) { // If there is no enclosing switch instance that we're aware of, then this // case statement and its block can be elided. This situation only happens // when we've constant-folded the switch, are emitting the constant case, @@ -1306,12 +1376,14 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { // Handle case ranges. if (S.getRHS()) { - EmitCaseStmtRange(S); + EmitCaseStmtRange(S, Attrs); return; } llvm::ConstantInt *CaseVal = Builder.getInt(S.getLHS()->EvaluateKnownConstInt(getContext())); + if (SwitchLikelihood) + SwitchLikelihood->push_back(Stmt::getLikelihood(Attrs)); // If the body of the case is just a 'break', try to not emit an empty block. // If we're profiling or we're not optimizing, leave the block in for better @@ -1352,6 +1424,10 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { // that falls through to the next case which is IR intensive. It also causes // deep recursion which can run into stack depth limitations. Handle // sequential non-range case statements specially. + // + // TODO When the next case has a likelihood attribute the code returns to the + // recursive algorithm. Maybe improve this case if it becomes common practice + // to use a lot of attributes. const CaseStmt *CurCase = &S; const CaseStmt *NextCase = dyn_cast<CaseStmt>(S.getSubStmt()); @@ -1365,8 +1441,12 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { SwitchWeights->push_back(getProfileCount(NextCase)); if (CGM.getCodeGenOpts().hasProfileClangInstr()) { CaseDest = createBasicBlock("sw.bb"); - EmitBlockWithFallThrough(CaseDest, &S); + EmitBlockWithFallThrough(CaseDest, CurCase); } + // Since this loop is only executed when the CaseStmt has no attributes + // use a hard-coded value. + if (SwitchLikelihood) + SwitchLikelihood->push_back(Stmt::LH_None); SwitchInsn->addCase(CaseVal, CaseDest); NextCase = dyn_cast<CaseStmt>(CurCase->getSubStmt()); @@ -1376,7 +1456,8 @@ void CodeGenFunction::EmitCaseStmt(const CaseStmt &S) { EmitStmt(CurCase->getSubStmt()); } -void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) { +void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S, + ArrayRef<const Attr *> Attrs) { // If there is no enclosing switch instance that we're aware of, then this // default statement can be elided. This situation only happens when we've // constant-folded the switch. @@ -1389,6 +1470,9 @@ void CodeGenFunction::EmitDefaultStmt(const DefaultStmt &S) { assert(DefaultBlock->empty() && "EmitDefaultStmt: Default block already defined?"); + if (SwitchLikelihood) + SwitchLikelihood->front() = Stmt::getLikelihood(Attrs); + EmitBlockWithFallThrough(DefaultBlock, &S); EmitStmt(S.getSubStmt()); @@ -1626,10 +1710,67 @@ static bool FindCaseStatementsForValue(const SwitchStmt &S, FoundCase; } +static Optional<SmallVector<uint64_t, 16>> +getLikelihoodWeights(ArrayRef<Stmt::Likelihood> Likelihoods) { + // Are there enough branches to weight them? + if (Likelihoods.size() <= 1) + return None; + + uint64_t NumUnlikely = 0; + uint64_t NumNone = 0; + uint64_t NumLikely = 0; + for (const auto LH : Likelihoods) { + switch (LH) { + case Stmt::LH_Unlikely: + ++NumUnlikely; + break; + case Stmt::LH_None: + ++NumNone; + break; + case Stmt::LH_Likely: + ++NumLikely; + break; + } + } + + // Is there a likelihood attribute used? + if (NumUnlikely == 0 && NumLikely == 0) + return None; + + // When multiple cases share the same code they can be combined during + // optimization. In that case the weights of the branch will be the sum of + // the individual weights. Make sure the combined sum of all neutral cases + // doesn't exceed the value of a single likely attribute. + // The additions both avoid divisions by 0 and make sure the weights of None + // don't exceed the weight of Likely. + const uint64_t Likely = INT32_MAX / (NumLikely + 2); + const uint64_t None = Likely / (NumNone + 1); + const uint64_t Unlikely = 0; + + SmallVector<uint64_t, 16> Result; + Result.reserve(Likelihoods.size()); + for (const auto LH : Likelihoods) { + switch (LH) { + case Stmt::LH_Unlikely: + Result.push_back(Unlikely); + break; + case Stmt::LH_None: + Result.push_back(None); + break; + case Stmt::LH_Likely: + Result.push_back(Likely); + break; + } + } + + return Result; +} + void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // Handle nested switch statements. llvm::SwitchInst *SavedSwitchInsn = SwitchInsn; SmallVector<uint64_t, 16> *SavedSwitchWeights = SwitchWeights; + SmallVector<Stmt::Likelihood, 16> *SavedSwitchLikelihood = SwitchLikelihood; llvm::BasicBlock *SavedCRBlock = CaseRangeBlock; // See if we can constant fold the condition of the switch and therefore only @@ -1704,7 +1845,12 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { // The default needs to be first. We store the edge count, so we already // know the right weight. SwitchWeights->push_back(DefaultCount); + } else if (CGM.getCodeGenOpts().OptimizationLevel) { + SwitchLikelihood = new SmallVector<Stmt::Likelihood, 16>(); + // Initialize the default case. + SwitchLikelihood->push_back(Stmt::LH_None); } + CaseRangeBlock = DefaultBlock; // Clear the insertion point to indicate we are in unreachable code. @@ -1768,9 +1914,21 @@ void CodeGenFunction::EmitSwitchStmt(const SwitchStmt &S) { SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof, createProfileWeights(*SwitchWeights)); delete SwitchWeights; + } else if (SwitchLikelihood) { + assert(SwitchLikelihood->size() == 1 + SwitchInsn->getNumCases() && + "switch likelihoods do not match switch cases"); + Optional<SmallVector<uint64_t, 16>> LHW = + getLikelihoodWeights(*SwitchLikelihood); + if (LHW) { + llvm::MDBuilder MDHelper(CGM.getLLVMContext()); + SwitchInsn->setMetadata(llvm::LLVMContext::MD_prof, + createProfileWeights(*LHW)); + } + delete SwitchLikelihood; } SwitchInsn = SavedSwitchInsn; SwitchWeights = SavedSwitchWeights; + SwitchLikelihood = SavedSwitchLikelihood; CaseRangeBlock = SavedCRBlock; } @@ -1830,7 +1988,8 @@ SimplifyConstraint(const char *Constraint, const TargetInfo &Target, static std::string AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr, const TargetInfo &Target, CodeGenModule &CGM, - const AsmStmt &Stmt, const bool EarlyClobber) { + const AsmStmt &Stmt, const bool EarlyClobber, + std::string *GCCReg = nullptr) { const DeclRefExpr *AsmDeclRef = dyn_cast<DeclRefExpr>(&AsmExpr); if (!AsmDeclRef) return Constraint; @@ -1855,6 +2014,8 @@ AddVariableConstraints(const std::string &Constraint, const Expr &AsmExpr, } // Canonicalize the register here before returning it. Register = Target.getNormalizedGCCRegisterName(Register); + if (GCCReg != nullptr) + *GCCReg = Register.str(); return (EarlyClobber ? "&{" : "{") + Register.str() + "}"; } @@ -1954,12 +2115,16 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, } static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, - bool ReadOnly, bool ReadNone, const AsmStmt &S, + bool ReadOnly, bool ReadNone, bool NoMerge, + const AsmStmt &S, const std::vector<llvm::Type *> &ResultRegTypes, CodeGenFunction &CGF, std::vector<llvm::Value *> &RegResults) { Result.addAttribute(llvm::AttributeList::FunctionIndex, llvm::Attribute::NoUnwind); + if (NoMerge) + Result.addAttribute(llvm::AttributeList::FunctionIndex, + llvm::Attribute::NoMerge); // Attach readnone and readonly attributes. if (!HasSideEffect) { if (ReadNone) @@ -2049,6 +2214,9 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { // Keep track of out constraints for tied input operand. std::vector<std::string> OutputConstraints; + // Keep track of defined physregs. + llvm::SmallSet<std::string, 8> PhysRegOutputs; + // An inline asm can be marked readonly if it meets the following conditions: // - it doesn't have any sideeffects // - it doesn't clobber memory @@ -2068,9 +2236,15 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { const Expr *OutExpr = S.getOutputExpr(i); OutExpr = OutExpr->IgnoreParenNoopCasts(getContext()); + std::string GCCReg; OutputConstraint = AddVariableConstraints(OutputConstraint, *OutExpr, getTarget(), CGM, S, - Info.earlyClobber()); + Info.earlyClobber(), + &GCCReg); + // Give an error on multiple outputs to same physreg. + if (!GCCReg.empty() && !PhysRegOutputs.insert(GCCReg).second) + CGM.Error(S.getAsmLoc(), "multiple outputs to hard register: " + GCCReg); + OutputConstraints.push_back(OutputConstraint); LValue Dest = EmitLValue(OutExpr); if (!Constraints.empty()) @@ -2132,8 +2306,21 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); } else { - ArgTypes.push_back(Dest.getAddress(*this).getType()); - Args.push_back(Dest.getPointer(*this)); + llvm::Type *DestAddrTy = Dest.getAddress(*this).getType(); + llvm::Value *DestPtr = Dest.getPointer(*this); + // Matrix types in memory are represented by arrays, but accessed through + // vector pointers, with the alignment specified on the access operation. + // For inline assembly, update pointer arguments to use vector pointers. + // Otherwise there will be a mis-match if the matrix is also an + // input-argument which is represented as vector. + if (isa<MatrixType>(OutExpr->getType().getCanonicalType())) { + DestAddrTy = llvm::PointerType::get( + ConvertType(OutExpr->getType()), + cast<llvm::PointerType>(DestAddrTy)->getAddressSpace()); + DestPtr = Builder.CreateBitCast(DestPtr, DestAddrTy); + } + ArgTypes.push_back(DestAddrTy); + Args.push_back(DestPtr); Constraints += "=*"; Constraints += OutputConstraint; ReadOnly = ReadNone = false; @@ -2157,7 +2344,8 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { LargestVectorWidth = std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); - if (Info.allowsRegister()) + // Only tie earlyclobber physregs. + if (Info.allowsRegister() && (GCCReg.empty() || Info.earlyClobber())) InOutConstraints += llvm::utostr(i); else InOutConstraints += OutputConstraint; @@ -2334,12 +2522,14 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) { Builder.CreateCallBr(IA, Fallthrough, Transfer, Args); EmitBlock(Fallthrough); UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly, - ReadNone, S, ResultRegTypes, *this, RegResults); + ReadNone, InNoMergeAttributedStmt, S, ResultRegTypes, + *this, RegResults); } else { llvm::CallInst *Result = Builder.CreateCall(IA, Args, getBundlesForFunclet(IA)); UpdateAsmCallInst(cast<llvm::CallBase>(*Result), HasSideEffect, ReadOnly, - ReadNone, S, ResultRegTypes, *this, RegResults); + ReadNone, InNoMergeAttributedStmt, S, ResultRegTypes, + *this, RegResults); } assert(RegResults.size() == ResultRegTypes.size()); @@ -2412,8 +2602,7 @@ LValue CodeGenFunction::InitCapturedStruct(const CapturedStmt &S) { I != E; ++I, ++CurField) { LValue LV = EmitLValueForFieldInitialization(SlotLV, *CurField); if (CurField->hasCapturedVLAType()) { - auto VAT = CurField->getCapturedVLAType(); - EmitStoreThroughLValue(RValue::get(VLASizeMap[VAT->getSizeExpr()]), LV); + EmitLambdaVLACapture(CurField->getCapturedVLAType(), LV); } else { EmitInitializerForField(*CurField, LV, *I); } |