aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2021-02-16 20:39:22 +0000
committerDimitry Andric <dim@FreeBSD.org>2021-02-16 20:39:22 +0000
commit9f93bc8bfd2690abd12a830e42a1c26038173ae5 (patch)
tree397432940f35d42b709d99d1d23523e7a24296c4
parentb60736ec1405bb0a8dd40989f67ef4c93da068ab (diff)
downloadsrc-9f93bc8bfd2690abd12a830e42a1c26038173ae5.tar.gz
src-9f93bc8bfd2690abd12a830e42a1c26038173ae5.zip
Vendor import of llvm-project branch release/12.xvendor/llvm-project/llvmorg-12.0.0-rc1-109-gd5d089bf08c9
llvmorg-12.0.0-rc1-109-gd5d089bf08c9.
-rw-r--r--clang/include/clang/AST/ASTContext.h3
-rw-r--r--clang/include/clang/AST/DeclCXX.h6
-rw-r--r--clang/include/clang/AST/Mangle.h3
-rw-r--r--clang/include/clang/AST/MangleNumberingContext.h5
-rw-r--r--clang/include/clang/AST/RecursiveASTVisitor.h12
-rw-r--r--clang/include/clang/ASTMatchers/ASTMatchers.h2
-rw-r--r--clang/include/clang/Driver/Options.td2
-rw-r--r--clang/include/clang/Lex/VariadicMacroSupport.h10
-rw-r--r--clang/include/clang/Sema/Sema.h18
-rw-r--r--clang/lib/AST/ASTImporter.cpp2
-rw-r--r--clang/lib/AST/CXXABI.h5
-rw-r--r--clang/lib/AST/DeclCXX.cpp14
-rw-r--r--clang/lib/AST/ExprConstant.cpp31
-rw-r--r--clang/lib/AST/ItaniumCXXABI.cpp6
-rw-r--r--clang/lib/AST/ItaniumMangle.cpp346
-rw-r--r--clang/lib/AST/MicrosoftCXXABI.cpp33
-rw-r--r--clang/lib/ASTMatchers/ASTMatchFinder.cpp48
-rw-r--r--clang/lib/ASTMatchers/ASTMatchersInternal.cpp2
-rw-r--r--clang/lib/Basic/ProfileList.cpp1
-rw-r--r--clang/lib/Basic/Targets/PPC.cpp3
-rw-r--r--clang/lib/Basic/Targets/RISCV.cpp6
-rw-r--r--clang/lib/CodeGen/CGBuiltin.cpp2
-rw-r--r--clang/lib/CodeGen/CGCUDANV.cpp8
-rw-r--r--clang/lib/Driver/ToolChains/Arch/RISCV.cpp2
-rw-r--r--clang/lib/Driver/ToolChains/Clang.cpp12
-rw-r--r--clang/lib/Driver/ToolChains/CommonArgs.cpp5
-rw-r--r--clang/lib/Driver/ToolChains/Linux.cpp9
-rw-r--r--clang/lib/Format/UnwrappedLineFormatter.cpp2
-rw-r--r--clang/lib/Frontend/CompilerInvocation.cpp2
-rw-r--r--clang/lib/Headers/avx512fintrin.h16
-rw-r--r--clang/lib/Lex/Preprocessor.cpp8
-rw-r--r--clang/lib/Lex/TokenLexer.cpp10
-rw-r--r--clang/lib/Parse/ParseDecl.cpp3
-rw-r--r--clang/lib/Sema/Sema.cpp43
-rw-r--r--clang/lib/Sema/SemaDecl.cpp88
-rw-r--r--clang/lib/Sema/SemaExpr.cpp2
-rw-r--r--clang/lib/Sema/SemaLambda.cpp10
-rw-r--r--clang/lib/Sema/SemaOpenMP.cpp23
-rw-r--r--clang/lib/Sema/TreeTransform.h7
-rw-r--r--clang/lib/Serialization/ASTReaderDecl.cpp1
-rw-r--r--clang/lib/Serialization/ASTWriter.cpp1
-rw-r--r--compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h2
-rw-r--r--libcxx/include/__locale20
-rw-r--r--libcxx/include/__threading_support2
-rw-r--r--libcxx/include/bit2
-rw-r--r--libcxx/include/limits4
-rw-r--r--libcxx/include/memory2
-rw-r--r--libcxx/src/atomic.cpp6
-rw-r--r--libcxx/src/locale.cpp2
-rw-r--r--lld/ELF/InputSection.cpp5
-rw-r--r--lld/docs/ReleaseNotes.rst20
-rw-r--r--llvm/include/llvm-c/Core.h6
-rw-r--r--llvm/include/llvm/Analysis/AssumptionCache.h2
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h4
-rw-r--r--llvm/include/llvm/Demangle/ItaniumDemangle.h68
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h8
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td2
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCV.td7
-rw-r--r--llvm/include/llvm/IR/PseudoProbe.h27
-rw-r--r--llvm/include/llvm/Passes/StandardInstrumentations.h2
-rw-r--r--llvm/include/llvm/ProfileData/SampleProf.h29
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfReader.h4
-rw-r--r--llvm/include/llvm/Support/CommandLine.h13
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleContextTracker.h6
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h41
-rw-r--r--llvm/lib/Analysis/MemorySSA.cpp26
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/IR/ConstantFold.cpp59
-rw-r--r--llvm/lib/IR/PseudoProbe.cpp41
-rw-r--r--llvm/lib/IR/Verifier.cpp6
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp6
-rw-r--r--llvm/lib/Passes/PassRegistry.def1
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp1
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp93
-rw-r--r--llvm/lib/ProfileData/SampleProfWriter.cpp4
-rw-r--r--llvm/lib/Support/CommandLine.cpp25
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp24
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp12
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h3
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp37
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp11
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp118
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h5
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormatsV.td23
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoB.td67
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td100
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td239
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td6
-rw-r--r--llvm/lib/Target/VE/VE.h10
-rw-r--r--llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp9
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td4
-rw-r--r--llvm/lib/Transforms/IPO/SampleContextTracker.cpp86
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp576
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp162
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnswitch.cpp10
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp67
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp19
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp7
-rw-r--r--llvm/tools/llvm-profdata/llvm-profdata.cpp2
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp4
-rw-r--r--openmp/runtime/src/kmp_config.h.cmake4
-rw-r--r--openmp/runtime/src/kmp_runtime.cpp6
-rw-r--r--openmp/runtime/src/kmp_settings.cpp3
110 files changed, 1966 insertions, 1050 deletions
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ce47d54e44b0..ae69a68608b7 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -538,6 +538,9 @@ private:
/// need them (like static local vars).
llvm::MapVector<const NamedDecl *, unsigned> MangleNumbers;
llvm::MapVector<const VarDecl *, unsigned> StaticLocalNumbers;
+ /// Mapping the associated device lambda mangling number if present.
+ mutable llvm::DenseMap<const CXXRecordDecl *, unsigned>
+ DeviceLambdaManglingNumbers;
/// Mapping that stores parameterIndex values for ParmVarDecls when
/// that value exceeds the bitfield size of ParmVarDeclBits.ParameterIndex.
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index e32101bb2276..89006b1cfa7f 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -1735,6 +1735,12 @@ public:
getLambdaData().HasKnownInternalLinkage = HasKnownInternalLinkage;
}
+ /// Set the device side mangling number.
+ void setDeviceLambdaManglingNumber(unsigned Num) const;
+
+ /// Retrieve the device side mangling number.
+ unsigned getDeviceLambdaManglingNumber() const;
+
/// Returns the inheritance model used for this record.
MSInheritanceModel getMSInheritanceModel() const;
diff --git a/clang/include/clang/AST/Mangle.h b/clang/include/clang/AST/Mangle.h
index 0e8d6dd53d8a..7b6495d85eb6 100644
--- a/clang/include/clang/AST/Mangle.h
+++ b/clang/include/clang/AST/Mangle.h
@@ -96,6 +96,9 @@ public:
virtual bool shouldMangleCXXName(const NamedDecl *D) = 0;
virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0;
+ virtual bool isDeviceMangleContext() const { return false; }
+ virtual void setDeviceMangleContext(bool) {}
+
// FIXME: consider replacing raw_ostream & with something like SmallString &.
void mangleName(GlobalDecl GD, raw_ostream &);
virtual void mangleCXXName(GlobalDecl GD, raw_ostream &) = 0;
diff --git a/clang/include/clang/AST/MangleNumberingContext.h b/clang/include/clang/AST/MangleNumberingContext.h
index f1ca6a05dbaf..eb33759682d6 100644
--- a/clang/include/clang/AST/MangleNumberingContext.h
+++ b/clang/include/clang/AST/MangleNumberingContext.h
@@ -52,6 +52,11 @@ public:
/// this context.
virtual unsigned getManglingNumber(const TagDecl *TD,
unsigned MSLocalManglingNumber) = 0;
+
+ /// Retrieve the mangling number of a new lambda expression with the
+ /// given call operator within the device context. No device number is
+ /// assigned if there's no device numbering context is associated.
+ virtual unsigned getDeviceManglingNumber(const CXXMethodDecl *) { return 0; }
};
} // end namespace clang
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 505ea700fd0e..7870cea198a7 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -186,6 +186,9 @@ public:
/// code, e.g., implicit constructors and destructors.
bool shouldVisitImplicitCode() const { return false; }
+ /// Return whether this visitor should recurse into lambda body
+ bool shouldVisitLambdaBody() const { return true; }
+
/// Return whether this visitor should traverse post-order.
bool shouldTraversePostOrder() const { return false; }
@@ -2057,6 +2060,15 @@ bool RecursiveASTVisitor<Derived>::TraverseFunctionHelper(FunctionDecl *D) {
// by clang.
(!D->isDefaulted() || getDerived().shouldVisitImplicitCode());
+ if (const auto *MD = dyn_cast<CXXMethodDecl>(D)) {
+ if (const CXXRecordDecl *RD = MD->getParent()) {
+ if (RD->isLambda() &&
+ declaresSameEntity(RD->getLambdaCallOperator(), MD)) {
+ VisitBody = VisitBody && getDerived().shouldVisitLambdaBody();
+ }
+ }
+ }
+
if (VisitBody) {
TRY_TO(TraverseStmt(D->getBody())); // Function body.
}
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 6f6dfab59a39..031fa4682c3a 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -344,7 +344,7 @@ extern const internal::VariadicAllOfMatcher<Decl> decl;
/// int number = 42;
/// auto [foo, bar] = std::make_pair{42, 42};
/// \endcode
-extern const internal::VariadicAllOfMatcher<DecompositionDecl>
+extern const internal::VariadicDynCastAllOfMatcher<Decl, DecompositionDecl>
decompositionDecl;
/// Matches a declaration of a linkage specification.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 42c5319041d0..1f6c13d5cc96 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1147,7 +1147,7 @@ def fprofile_update_EQ : Joined<["-"], "fprofile-update=">,
defm pseudo_probe_for_profiling : BoolFOption<"pseudo-probe-for-profiling",
CodeGenOpts<"PseudoProbeForProfiling">, DefaultFalse,
PosFlag<SetTrue, [], "Emit">, NegFlag<SetFalse, [], "Do not emit">,
- BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiler">>;
+ BothFlags<[NoXarchOption, CC1Option], " pseudo probes for sample profiling">>;
def forder_file_instrumentation : Flag<["-"], "forder-file-instrumentation">,
Group<f_Group>, Flags<[CC1Option, CoreOption]>,
HelpText<"Generate instrumented code to collect order file into default.profraw file (overridden by '=' form of option or LLVM_PROFILE_FILE env var)">;
diff --git a/clang/include/clang/Lex/VariadicMacroSupport.h b/clang/include/clang/Lex/VariadicMacroSupport.h
index 989e0ac703c9..119f02201fc6 100644
--- a/clang/include/clang/Lex/VariadicMacroSupport.h
+++ b/clang/include/clang/Lex/VariadicMacroSupport.h
@@ -39,17 +39,14 @@ namespace clang {
assert(Ident__VA_ARGS__->isPoisoned() && "__VA_ARGS__ should be poisoned "
"outside an ISO C/C++ variadic "
"macro definition!");
- assert(
- !Ident__VA_OPT__ ||
- (Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!"));
+ assert(Ident__VA_OPT__->isPoisoned() && "__VA_OPT__ should be poisoned!");
}
/// Client code should call this function just before the Preprocessor is
/// about to Lex tokens from the definition of a variadic (ISO C/C++) macro.
void enterScope() {
Ident__VA_ARGS__->setIsPoisoned(false);
- if (Ident__VA_OPT__)
- Ident__VA_OPT__->setIsPoisoned(false);
+ Ident__VA_OPT__->setIsPoisoned(false);
}
/// Client code should call this function as soon as the Preprocessor has
@@ -58,8 +55,7 @@ namespace clang {
/// (might be explicitly called, and then reinvoked via the destructor).
void exitScope() {
Ident__VA_ARGS__->setIsPoisoned(true);
- if (Ident__VA_OPT__)
- Ident__VA_OPT__->setIsPoisoned(true);
+ Ident__VA_OPT__->setIsPoisoned(true);
}
~VariadicMacroScopeGuard() { exitScope(); }
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 7f7c84eb1b1d..2530a2776373 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6558,7 +6558,7 @@ public:
/// Number lambda for linkage purposes if necessary.
void handleLambdaNumbering(
CXXRecordDecl *Class, CXXMethodDecl *Method,
- Optional<std::tuple<unsigned, bool, Decl *>> Mangling = None);
+ Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling = None);
/// Endow the lambda scope info with the relevant properties.
void buildLambdaScope(sema::LambdaScopeInfo *LSI,
@@ -11948,8 +11948,8 @@ public:
/// if (diagIfOpenMPDeviceCode(Loc, diag::err_vla_unsupported))
/// return ExprError();
/// // Otherwise, continue parsing as normal.
- SemaDiagnosticBuilder diagIfOpenMPDeviceCode(SourceLocation Loc,
- unsigned DiagID);
+ SemaDiagnosticBuilder
+ diagIfOpenMPDeviceCode(SourceLocation Loc, unsigned DiagID, FunctionDecl *FD);
/// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
/// context is "used as host code".
@@ -11965,17 +11965,19 @@ public:
/// return ExprError();
/// // Otherwise, continue parsing as normal.
SemaDiagnosticBuilder diagIfOpenMPHostCode(SourceLocation Loc,
- unsigned DiagID);
+ unsigned DiagID, FunctionDecl *FD);
- SemaDiagnosticBuilder targetDiag(SourceLocation Loc, unsigned DiagID);
+ SemaDiagnosticBuilder targetDiag(SourceLocation Loc, unsigned DiagID,
+ FunctionDecl *FD = nullptr);
SemaDiagnosticBuilder targetDiag(SourceLocation Loc,
- const PartialDiagnostic &PD) {
- return targetDiag(Loc, PD.getDiagID()) << PD;
+ const PartialDiagnostic &PD,
+ FunctionDecl *FD = nullptr) {
+ return targetDiag(Loc, PD.getDiagID(), FD) << PD;
}
/// Check if the expression is allowed to be used in expressions for the
/// offloading devices.
- void checkDeviceDecl(const ValueDecl *D, SourceLocation Loc);
+ void checkDeviceDecl(ValueDecl *D, SourceLocation Loc);
enum CUDAFunctionTarget {
CFT_Device,
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 085c50c0667b..0d723fbbcd8c 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -2848,6 +2848,8 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
return CDeclOrErr.takeError();
D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr,
DCXX->hasKnownLambdaInternalLinkage());
+ D2CXX->setDeviceLambdaManglingNumber(
+ DCXX->getDeviceLambdaManglingNumber());
} else if (DCXX->isInjectedClassName()) {
// We have to be careful to do a similar dance to the one in
// Sema::ActOnStartCXXMemberDeclarations
diff --git a/clang/lib/AST/CXXABI.h b/clang/lib/AST/CXXABI.h
index 31cb36918726..ca9424bcb7a4 100644
--- a/clang/lib/AST/CXXABI.h
+++ b/clang/lib/AST/CXXABI.h
@@ -22,8 +22,9 @@ class ASTContext;
class CXXConstructorDecl;
class DeclaratorDecl;
class Expr;
-class MemberPointerType;
+class MangleContext;
class MangleNumberingContext;
+class MemberPointerType;
/// Implements C++ ABI-specific semantic analysis functions.
class CXXABI {
@@ -75,6 +76,8 @@ public:
/// Creates an instance of a C++ ABI class.
CXXABI *CreateItaniumCXXABI(ASTContext &Ctx);
CXXABI *CreateMicrosoftCXXABI(ASTContext &Ctx);
+std::unique_ptr<MangleNumberingContext>
+createItaniumNumberingContext(MangleContext *);
}
#endif
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 0368ada0b81c..0375f9b4432e 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -1593,6 +1593,20 @@ Decl *CXXRecordDecl::getLambdaContextDecl() const {
return getLambdaData().ContextDecl.get(Source);
}
+void CXXRecordDecl::setDeviceLambdaManglingNumber(unsigned Num) const {
+ assert(isLambda() && "Not a lambda closure type!");
+ if (Num)
+ getASTContext().DeviceLambdaManglingNumbers[this] = Num;
+}
+
+unsigned CXXRecordDecl::getDeviceLambdaManglingNumber() const {
+ assert(isLambda() && "Not a lambda closure type!");
+ auto I = getASTContext().DeviceLambdaManglingNumbers.find(this);
+ if (I != getASTContext().DeviceLambdaManglingNumbers.end())
+ return I->second;
+ return 0;
+}
+
static CanQualType GetConversionType(ASTContext &Context, NamedDecl *Conv) {
QualType T =
cast<CXXConversionDecl>(Conv->getUnderlyingDecl()->getAsFunction())
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 56181bbe1166..1bdad771a923 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -3497,8 +3497,8 @@ static bool diagnoseMutableFields(EvalInfo &Info, const Expr *E, AccessKinds AK,
static bool lifetimeStartedInEvaluation(EvalInfo &Info,
APValue::LValueBase Base,
bool MutableSubobject = false) {
- // A temporary we created.
- if (Base.getCallIndex())
+ // A temporary or transient heap allocation we created.
+ if (Base.getCallIndex() || Base.is<DynamicAllocLValue>())
return true;
switch (Info.IsEvaluatingDecl) {
@@ -10009,6 +10009,7 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
auto *CaptureInitIt = E->capture_init_begin();
const LambdaCapture *CaptureIt = ClosureClass->captures_begin();
bool Success = true;
+ const ASTRecordLayout &Layout = Info.Ctx.getASTRecordLayout(ClosureClass);
for (const auto *Field : ClosureClass->fields()) {
assert(CaptureInitIt != E->capture_init_end());
// Get the initializer for this field
@@ -10019,8 +10020,13 @@ bool RecordExprEvaluator::VisitLambdaExpr(const LambdaExpr *E) {
if (!CurFieldInit)
return Error(E);
+ LValue Subobject = This;
+
+ if (!HandleLValueMember(Info, E, Subobject, Field, &Layout))
+ return false;
+
APValue &FieldVal = Result.getStructField(Field->getFieldIndex());
- if (!EvaluateInPlace(FieldVal, Info, This, CurFieldInit)) {
+ if (!EvaluateInPlace(FieldVal, Info, Subobject, CurFieldInit)) {
if (!Info.keepEvaluatingAfterFailure())
return false;
Success = false;
@@ -14786,11 +14792,14 @@ bool Expr::EvaluateAsLValue(EvalResult &Result, const ASTContext &Ctx,
static bool EvaluateDestruction(const ASTContext &Ctx, APValue::LValueBase Base,
APValue DestroyedValue, QualType Type,
- SourceLocation Loc, Expr::EvalStatus &EStatus) {
- EvalInfo Info(Ctx, EStatus, EvalInfo::EM_ConstantExpression);
+ SourceLocation Loc, Expr::EvalStatus &EStatus,
+ bool IsConstantDestruction) {
+ EvalInfo Info(Ctx, EStatus,
+ IsConstantDestruction ? EvalInfo::EM_ConstantExpression
+ : EvalInfo::EM_ConstantFold);
Info.setEvaluatingDecl(Base, DestroyedValue,
EvalInfo::EvaluatingDeclKind::Dtor);
- Info.InConstantContext = true;
+ Info.InConstantContext = IsConstantDestruction;
LValue LVal;
LVal.set(Base);
@@ -14844,7 +14853,8 @@ bool Expr::EvaluateAsConstantExpr(EvalResult &Result, const ASTContext &Ctx,
// If this is a class template argument, it's required to have constant
// destruction too.
if (Kind == ConstantExprKind::ClassTemplateArgument &&
- (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result) ||
+ (!EvaluateDestruction(Ctx, Base, Result.Val, T, getBeginLoc(), Result,
+ true) ||
Result.HasSideEffects)) {
// FIXME: Prefix a note to indicate that the problem is lack of constant
// destruction.
@@ -14910,6 +14920,10 @@ bool VarDecl::evaluateDestruction(
Expr::EvalStatus EStatus;
EStatus.Diag = &Notes;
+ // Only treat the destruction as constant destruction if we formally have
+ // constant initialization (or are usable in a constant expression).
+ bool IsConstantDestruction = hasConstantInitialization();
+
// Make a copy of the value for the destructor to mutate, if we know it.
// Otherwise, treat the value as default-initialized; if the destructor works
// anyway, then the destruction is constant (and must be essentially empty).
@@ -14920,7 +14934,8 @@ bool VarDecl::evaluateDestruction(
return false;
if (!EvaluateDestruction(getASTContext(), this, std::move(DestroyedValue),
- getType(), getLocation(), EStatus) ||
+ getType(), getLocation(), EStatus,
+ IsConstantDestruction) ||
EStatus.HasSideEffects)
return false;
diff --git a/clang/lib/AST/ItaniumCXXABI.cpp b/clang/lib/AST/ItaniumCXXABI.cpp
index 069add8464ae..be10258a2d77 100644
--- a/clang/lib/AST/ItaniumCXXABI.cpp
+++ b/clang/lib/AST/ItaniumCXXABI.cpp
@@ -258,3 +258,9 @@ public:
CXXABI *clang::CreateItaniumCXXABI(ASTContext &Ctx) {
return new ItaniumCXXABI(Ctx);
}
+
+std::unique_ptr<MangleNumberingContext>
+clang::createItaniumNumberingContext(MangleContext *Mangler) {
+ return std::make_unique<ItaniumNumberingContext>(
+ cast<ItaniumMangleContext>(Mangler));
+}
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 6c8d5687c64a..5cad84a96845 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -125,6 +125,8 @@ class ItaniumMangleContextImpl : public ItaniumMangleContext {
llvm::DenseMap<DiscriminatorKeyTy, unsigned> Discriminator;
llvm::DenseMap<const NamedDecl*, unsigned> Uniquifier;
+ bool IsDevCtx = false;
+
public:
explicit ItaniumMangleContextImpl(ASTContext &Context,
DiagnosticsEngine &Diags)
@@ -137,6 +139,10 @@ public:
bool shouldMangleStringLiteral(const StringLiteral *) override {
return false;
}
+
+ bool isDeviceMangleContext() const override { return IsDevCtx; }
+ void setDeviceMangleContext(bool IsDev) override { IsDevCtx = IsDev; }
+
void mangleCXXName(GlobalDecl GD, raw_ostream &) override;
void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk,
raw_ostream &) override;
@@ -546,8 +552,8 @@ private:
unsigned knownArity);
void mangleCastExpression(const Expr *E, StringRef CastEncoding);
void mangleInitListElements(const InitListExpr *InitList);
- void mangleDeclRefExpr(const NamedDecl *D);
- void mangleExpression(const Expr *E, unsigned Arity = UnknownArity);
+ void mangleExpression(const Expr *E, unsigned Arity = UnknownArity,
+ bool AsTemplateArg = false);
void mangleCXXCtorType(CXXCtorType T, const CXXRecordDecl *InheritedFrom);
void mangleCXXDtorType(CXXDtorType T);
@@ -558,6 +564,7 @@ private:
unsigned NumTemplateArgs);
void mangleTemplateArgs(TemplateName TN, const TemplateArgumentList &AL);
void mangleTemplateArg(TemplateArgument A, bool NeedExactType);
+ void mangleTemplateArgExpr(const Expr *E);
void mangleValueInTemplateArg(QualType T, const APValue &V, bool TopLevel,
bool NeedExactType = false);
@@ -726,9 +733,17 @@ void CXXNameMangler::mangleFunctionEncodingBareType(const FunctionDecl *FD) {
EnableIfAttr *EIA = dyn_cast<EnableIfAttr>(*I);
if (!EIA)
continue;
- Out << 'X';
- mangleExpression(EIA->getCond());
- Out << 'E';
+ if (Context.getASTContext().getLangOpts().getClangABICompat() >
+ LangOptions::ClangABI::Ver11) {
+ mangleTemplateArgExpr(EIA->getCond());
+ } else {
+ // Prior to Clang 12, we hardcoded the X/E around enable-if's argument,
+ // even though <template-arg> should not include an X/E around
+ // <expr-primary>.
+ Out << 'X';
+ mangleExpression(EIA->getCond());
+ Out << 'E';
+ }
}
Out << 'E';
FunctionTypeDepth.pop(Saved);
@@ -1837,7 +1852,15 @@ void CXXNameMangler::mangleLambda(const CXXRecordDecl *Lambda) {
// (in lexical order) with that same <lambda-sig> and context.
//
// The AST keeps track of the number for us.
- unsigned Number = Lambda->getLambdaManglingNumber();
+ //
+ // In CUDA/HIP, to ensure the consistent lamba numbering between the device-
+ // and host-side compilations, an extra device mangle context may be created
+ // if the host-side CXX ABI has different numbering for lambda. In such case,
+ // if the mangle context is that device-side one, use the device-side lambda
+ // mangling number for this lambda.
+ unsigned Number = Context.isDeviceMangleContext()
+ ? Lambda->getDeviceLambdaManglingNumber()
+ : Lambda->getLambdaManglingNumber();
assert(Number > 0 && "Lambda should be mangled as an unnamed class");
if (Number > 1)
mangleNumber(Number - 2);
@@ -3528,8 +3551,8 @@ void CXXNameMangler::mangleType(const DependentSizedMatrixType *T) {
Out << "u" << VendorQualifier.size() << VendorQualifier;
Out << "I";
- mangleTemplateArg(T->getRowExpr(), false);
- mangleTemplateArg(T->getColumnExpr(), false);
+ mangleTemplateArgExpr(T->getRowExpr());
+ mangleTemplateArgExpr(T->getColumnExpr());
mangleType(T->getElementType());
Out << "E";
}
@@ -3871,33 +3894,8 @@ void CXXNameMangler::mangleInitListElements(const InitListExpr *InitList) {
mangleExpression(InitList->getInit(i));
}
-void CXXNameMangler::mangleDeclRefExpr(const NamedDecl *D) {
- switch (D->getKind()) {
- default:
- // <expr-primary> ::= L <mangled-name> E # external name
- Out << 'L';
- mangle(D);
- Out << 'E';
- break;
-
- case Decl::ParmVar:
- mangleFunctionParam(cast<ParmVarDecl>(D));
- break;
-
- case Decl::EnumConstant: {
- const EnumConstantDecl *ED = cast<EnumConstantDecl>(D);
- mangleIntegerLiteral(ED->getType(), ED->getInitVal());
- break;
- }
-
- case Decl::NonTypeTemplateParm:
- const NonTypeTemplateParmDecl *PD = cast<NonTypeTemplateParmDecl>(D);
- mangleTemplateParameter(PD->getDepth(), PD->getIndex());
- break;
- }
-}
-
-void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
+void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity,
+ bool AsTemplateArg) {
// <expression> ::= <unary operator-name> <expression>
// ::= <binary operator-name> <expression> <expression>
// ::= <trinary operator-name> <expression> <expression> <expression>
@@ -3911,18 +3909,64 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity) {
// ::= at <type> # alignof (a type)
// ::= <template-param>
// ::= <function-param>
+ // ::= fpT # 'this' expression (part of <function-param>)
// ::= sr <type> <unqualified-name> # dependent name
// ::= sr <type> <unqualified-name> <template-args> # dependent template-id
// ::= ds <expression> <expression> # expr.*expr
// ::= sZ <template-param> # size of a parameter pack
// ::= sZ <function-param> # size of a function parameter pack
+ // ::= u <source-name> <template-arg>* E # vendor extended expression
// ::= <expr-primary>
// <expr-primary> ::= L <type> <value number> E # integer literal
- // ::= L <type <value float> E # floating literal
+ // ::= L <type> <value float> E # floating literal
+ // ::= L <type> <string type> E # string literal
+ // ::= L <nullptr type> E # nullptr literal "LDnE"
+ // ::= L <pointer type> 0 E # null pointer template argument
+ // ::= L <type> <real-part float> _ <imag-part float> E # complex floating point literal (C99); not used by clang
// ::= L <mangled-name> E # external name
- // ::= fpT # 'this' expression
QualType ImplicitlyConvertedToType;
+ // A top-level expression that's not <expr-primary> needs to be wrapped in
+ // X...E in a template arg.
+ bool IsPrimaryExpr = true;
+ auto NotPrimaryExpr = [&] {
+ if (AsTemplateArg && IsPrimaryExpr)
+ Out << 'X';
+ IsPrimaryExpr = false;
+ };
+
+ auto MangleDeclRefExpr = [&](const NamedDecl *D) {
+ switch (D->getKind()) {
+ default:
+ // <expr-primary> ::= L <mangled-name> E # external name
+ Out << 'L';
+ mangle(D);
+ Out << 'E';
+ break;
+
+ case Decl::ParmVar:
+ NotPrimaryExpr();
+ mangleFunctionParam(cast<ParmVarDecl>(D));
+ break;
+
+ case Decl::EnumConstant: {
+ // <expr-primary>
+ const EnumConstantDecl *ED = cast<EnumConstantDecl>(D);
+ mangleIntegerLiteral(ED->getType(), ED->getInitVal());
+ break;
+ }
+
+ case Decl::NonTypeTemplateParm:
+ NotPrimaryExpr();
+ const NonTypeTemplateParmDecl *PD = cast<NonTypeTemplateParmDecl>(D);
+ mangleTemplateParameter(PD->getDepth(), PD->getIndex());
+ break;
+ }
+ };
+
+ // 'goto recurse' is used when handling a simple "unwrapping" node which
+ // produces no output, where ImplicitlyConvertedToType and AsTemplateArg need
+ // to be preserved.
recurse:
switch (E->getStmtClass()) {
case Expr::NoStmtClass:
@@ -3994,6 +4038,7 @@ recurse:
case Expr::SourceLocExprClass:
case Expr::BuiltinBitCastExprClass:
{
+ NotPrimaryExpr();
if (!NullOut) {
// As bad as this diagnostic is, it's better than crashing.
DiagnosticsEngine &Diags = Context.getDiags();
@@ -4001,33 +4046,48 @@ recurse:
"cannot yet mangle expression type %0");
Diags.Report(E->getExprLoc(), DiagID)
<< E->getStmtClassName() << E->getSourceRange();
+ return;
}
break;
}
case Expr::CXXUuidofExprClass: {
+ NotPrimaryExpr();
const CXXUuidofExpr *UE = cast<CXXUuidofExpr>(E);
- if (UE->isTypeOperand()) {
- QualType UuidT = UE->getTypeOperand(Context.getASTContext());
- Out << "u8__uuidoft";
- mangleType(UuidT);
+ // As of clang 12, uuidof uses the vendor extended expression
+ // mangling. Previously, it used a special-cased nonstandard extension.
+ if (Context.getASTContext().getLangOpts().getClangABICompat() >
+ LangOptions::ClangABI::Ver11) {
+ Out << "u8__uuidof";
+ if (UE->isTypeOperand())
+ mangleType(UE->getTypeOperand(Context.getASTContext()));
+ else
+ mangleTemplateArgExpr(UE->getExprOperand());
+ Out << 'E';
} else {
- Expr *UuidExp = UE->getExprOperand();
- Out << "u8__uuidofz";
- mangleExpression(UuidExp, Arity);
+ if (UE->isTypeOperand()) {
+ QualType UuidT = UE->getTypeOperand(Context.getASTContext());
+ Out << "u8__uuidoft";
+ mangleType(UuidT);
+ } else {
+ Expr *UuidExp = UE->getExprOperand();
+ Out << "u8__uuidofz";
+ mangleExpression(UuidExp);
+ }
}
break;
}
// Even gcc-4.5 doesn't mangle this.
case Expr::BinaryConditionalOperatorClass: {
+ NotPrimaryExpr();
DiagnosticsEngine &Diags = Context.getDiags();
unsigned DiagID =
Diags.getCustomDiagID(DiagnosticsEngine::Error,
"?: operator with omitted middle operand cannot be mangled");
Diags.Report(E->getExprLoc(), DiagID)
<< E->getStmtClassName() << E->getSourceRange();
- break;
+ return;
}
// These are used for internal purposes and cannot be meaningfully mangled.
@@ -4035,6 +4095,7 @@ recurse:
llvm_unreachable("cannot mangle opaque value; mangling wrong thing?");
case Expr::InitListExprClass: {
+ NotPrimaryExpr();
Out << "il";
mangleInitListElements(cast<InitListExpr>(E));
Out << "E";
@@ -4042,6 +4103,7 @@ recurse:
}
case Expr::DesignatedInitExprClass: {
+ NotPrimaryExpr();
auto *DIE = cast<DesignatedInitExpr>(E);
for (const auto &Designator : DIE->designators()) {
if (Designator.isFieldDesignator()) {
@@ -4063,27 +4125,27 @@ recurse:
}
case Expr::CXXDefaultArgExprClass:
- mangleExpression(cast<CXXDefaultArgExpr>(E)->getExpr(), Arity);
- break;
+ E = cast<CXXDefaultArgExpr>(E)->getExpr();
+ goto recurse;
case Expr::CXXDefaultInitExprClass:
- mangleExpression(cast<CXXDefaultInitExpr>(E)->getExpr(), Arity);
- break;
+ E = cast<CXXDefaultInitExpr>(E)->getExpr();
+ goto recurse;
case Expr::CXXStdInitializerListExprClass:
- mangleExpression(cast<CXXStdInitializerListExpr>(E)->getSubExpr(), Arity);
- break;
+ E = cast<CXXStdInitializerListExpr>(E)->getSubExpr();
+ goto recurse;
case Expr::SubstNonTypeTemplateParmExprClass:
- mangleExpression(cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement(),
- Arity);
- break;
+ E = cast<SubstNonTypeTemplateParmExpr>(E)->getReplacement();
+ goto recurse;
case Expr::UserDefinedLiteralClass:
// We follow g++'s approach of mangling a UDL as a call to the literal
// operator.
case Expr::CXXMemberCallExprClass: // fallthrough
case Expr::CallExprClass: {
+ NotPrimaryExpr();
const CallExpr *CE = cast<CallExpr>(E);
// <expression> ::= cp <simple-id> <expression>* E
@@ -4114,6 +4176,7 @@ recurse:
}
case Expr::CXXNewExprClass: {
+ NotPrimaryExpr();
const CXXNewExpr *New = cast<CXXNewExpr>(E);
if (New->isGlobalNew()) Out << "gs";
Out << (New->isArray() ? "na" : "nw");
@@ -4149,6 +4212,7 @@ recurse:
}
case Expr::CXXPseudoDestructorExprClass: {
+ NotPrimaryExpr();
const auto *PDE = cast<CXXPseudoDestructorExpr>(E);
if (const Expr *Base = PDE->getBase())
mangleMemberExprBase(Base, PDE->isArrow());
@@ -4175,6 +4239,7 @@ recurse:
}
case Expr::MemberExprClass: {
+ NotPrimaryExpr();
const MemberExpr *ME = cast<MemberExpr>(E);
mangleMemberExpr(ME->getBase(), ME->isArrow(),
ME->getQualifier(), nullptr,
@@ -4185,6 +4250,7 @@ recurse:
}
case Expr::UnresolvedMemberExprClass: {
+ NotPrimaryExpr();
const UnresolvedMemberExpr *ME = cast<UnresolvedMemberExpr>(E);
mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(),
ME->isArrow(), ME->getQualifier(), nullptr,
@@ -4195,6 +4261,7 @@ recurse:
}
case Expr::CXXDependentScopeMemberExprClass: {
+ NotPrimaryExpr();
const CXXDependentScopeMemberExpr *ME
= cast<CXXDependentScopeMemberExpr>(E);
mangleMemberExpr(ME->isImplicitAccess() ? nullptr : ME->getBase(),
@@ -4207,6 +4274,7 @@ recurse:
}
case Expr::UnresolvedLookupExprClass: {
+ NotPrimaryExpr();
const UnresolvedLookupExpr *ULE = cast<UnresolvedLookupExpr>(E);
mangleUnresolvedName(ULE->getQualifier(), ULE->getName(),
ULE->getTemplateArgs(), ULE->getNumTemplateArgs(),
@@ -4215,6 +4283,7 @@ recurse:
}
case Expr::CXXUnresolvedConstructExprClass: {
+ NotPrimaryExpr();
const CXXUnresolvedConstructExpr *CE = cast<CXXUnresolvedConstructExpr>(E);
unsigned N = CE->getNumArgs();
@@ -4225,7 +4294,7 @@ recurse:
mangleType(CE->getType());
mangleInitListElements(IL);
Out << "E";
- return;
+ break;
}
Out << "cv";
@@ -4237,14 +4306,17 @@ recurse:
}
case Expr::CXXConstructExprClass: {
+ // An implicit cast is silent, thus may contain <expr-primary>.
const auto *CE = cast<CXXConstructExpr>(E);
if (!CE->isListInitialization() || CE->isStdInitListInitialization()) {
assert(
CE->getNumArgs() >= 1 &&
(CE->getNumArgs() == 1 || isa<CXXDefaultArgExpr>(CE->getArg(1))) &&
"implicit CXXConstructExpr must have one argument");
- return mangleExpression(cast<CXXConstructExpr>(E)->getArg(0));
+ E = cast<CXXConstructExpr>(E)->getArg(0);
+ goto recurse;
}
+ NotPrimaryExpr();
Out << "il";
for (auto *E : CE->arguments())
mangleExpression(E);
@@ -4253,6 +4325,7 @@ recurse:
}
case Expr::CXXTemporaryObjectExprClass: {
+ NotPrimaryExpr();
const auto *CE = cast<CXXTemporaryObjectExpr>(E);
unsigned N = CE->getNumArgs();
bool List = CE->isListInitialization();
@@ -4282,17 +4355,20 @@ recurse:
}
case Expr::CXXScalarValueInitExprClass:
+ NotPrimaryExpr();
Out << "cv";
mangleType(E->getType());
Out << "_E";
break;
case Expr::CXXNoexceptExprClass:
+ NotPrimaryExpr();
Out << "nx";
mangleExpression(cast<CXXNoexceptExpr>(E)->getOperand());
break;
case Expr::UnaryExprOrTypeTraitExprClass: {
+ // Non-instantiation-dependent traits are an <expr-primary> integer literal.
const UnaryExprOrTypeTraitExpr *SAE = cast<UnaryExprOrTypeTraitExpr>(E);
if (!SAE->isInstantiationDependent()) {
@@ -4312,13 +4388,41 @@ recurse:
break;
}
+ NotPrimaryExpr(); // But otherwise, they are not.
+
+ auto MangleAlignofSizeofArg = [&] {
+ if (SAE->isArgumentType()) {
+ Out << 't';
+ mangleType(SAE->getArgumentType());
+ } else {
+ Out << 'z';
+ mangleExpression(SAE->getArgumentExpr());
+ }
+ };
+
switch(SAE->getKind()) {
case UETT_SizeOf:
Out << 's';
+ MangleAlignofSizeofArg();
break;
case UETT_PreferredAlignOf:
+ // As of clang 12, we mangle __alignof__ differently than alignof. (They
+ // have acted differently since Clang 8, but were previously mangled the
+ // same.)
+ if (Context.getASTContext().getLangOpts().getClangABICompat() >
+ LangOptions::ClangABI::Ver11) {
+ Out << "u11__alignof__";
+ if (SAE->isArgumentType())
+ mangleType(SAE->getArgumentType());
+ else
+ mangleTemplateArgExpr(SAE->getArgumentExpr());
+ Out << 'E';
+ break;
+ }
+ LLVM_FALLTHROUGH;
case UETT_AlignOf:
Out << 'a';
+ MangleAlignofSizeofArg();
break;
case UETT_VecStep: {
DiagnosticsEngine &Diags = Context.getDiags();
@@ -4336,17 +4440,11 @@ recurse:
return;
}
}
- if (SAE->isArgumentType()) {
- Out << 't';
- mangleType(SAE->getArgumentType());
- } else {
- Out << 'z';
- mangleExpression(SAE->getArgumentExpr());
- }
break;
}
case Expr::CXXThrowExprClass: {
+ NotPrimaryExpr();
const CXXThrowExpr *TE = cast<CXXThrowExpr>(E);
// <expression> ::= tw <expression> # throw expression
// ::= tr # rethrow
@@ -4360,6 +4458,7 @@ recurse:
}
case Expr::CXXTypeidExprClass: {
+ NotPrimaryExpr();
const CXXTypeidExpr *TIE = cast<CXXTypeidExpr>(E);
// <expression> ::= ti <type> # typeid (type)
// ::= te <expression> # typeid (expression)
@@ -4374,6 +4473,7 @@ recurse:
}
case Expr::CXXDeleteExprClass: {
+ NotPrimaryExpr();
const CXXDeleteExpr *DE = cast<CXXDeleteExpr>(E);
// <expression> ::= [gs] dl <expression> # [::] delete expr
// ::= [gs] da <expression> # [::] delete [] expr
@@ -4384,6 +4484,7 @@ recurse:
}
case Expr::UnaryOperatorClass: {
+ NotPrimaryExpr();
const UnaryOperator *UO = cast<UnaryOperator>(E);
mangleOperatorName(UnaryOperator::getOverloadedOperator(UO->getOpcode()),
/*Arity=*/1);
@@ -4392,6 +4493,7 @@ recurse:
}
case Expr::ArraySubscriptExprClass: {
+ NotPrimaryExpr();
const ArraySubscriptExpr *AE = cast<ArraySubscriptExpr>(E);
// Array subscript is treated as a syntactically weird form of
@@ -4403,6 +4505,7 @@ recurse:
}
case Expr::MatrixSubscriptExprClass: {
+ NotPrimaryExpr();
const MatrixSubscriptExpr *ME = cast<MatrixSubscriptExpr>(E);
Out << "ixix";
mangleExpression(ME->getBase());
@@ -4413,6 +4516,7 @@ recurse:
case Expr::CompoundAssignOperatorClass: // fallthrough
case Expr::BinaryOperatorClass: {
+ NotPrimaryExpr();
const BinaryOperator *BO = cast<BinaryOperator>(E);
if (BO->getOpcode() == BO_PtrMemD)
Out << "ds";
@@ -4425,6 +4529,7 @@ recurse:
}
case Expr::CXXRewrittenBinaryOperatorClass: {
+ NotPrimaryExpr();
// The mangled form represents the original syntax.
CXXRewrittenBinaryOperator::DecomposedForm Decomposed =
cast<CXXRewrittenBinaryOperator>(E)->getDecomposedForm();
@@ -4436,6 +4541,7 @@ recurse:
}
case Expr::ConditionalOperatorClass: {
+ NotPrimaryExpr();
const ConditionalOperator *CO = cast<ConditionalOperator>(E);
mangleOperatorName(OO_Conditional, /*Arity=*/3);
mangleExpression(CO->getCond());
@@ -4451,19 +4557,22 @@ recurse:
}
case Expr::ObjCBridgedCastExprClass: {
+ NotPrimaryExpr();
// Mangle ownership casts as a vendor extended operator __bridge,
// __bridge_transfer, or __bridge_retain.
StringRef Kind = cast<ObjCBridgedCastExpr>(E)->getBridgeKindName();
Out << "v1U" << Kind.size() << Kind;
+ mangleCastExpression(E, "cv");
+ break;
}
- // Fall through to mangle the cast itself.
- LLVM_FALLTHROUGH;
case Expr::CStyleCastExprClass:
+ NotPrimaryExpr();
mangleCastExpression(E, "cv");
break;
case Expr::CXXFunctionalCastExprClass: {
+ NotPrimaryExpr();
auto *Sub = cast<ExplicitCastExpr>(E)->getSubExpr()->IgnoreImplicit();
// FIXME: Add isImplicit to CXXConstructExpr.
if (auto *CCE = dyn_cast<CXXConstructExpr>(Sub))
@@ -4483,22 +4592,28 @@ recurse:
}
case Expr::CXXStaticCastExprClass:
+ NotPrimaryExpr();
mangleCastExpression(E, "sc");
break;
case Expr::CXXDynamicCastExprClass:
+ NotPrimaryExpr();
mangleCastExpression(E, "dc");
break;
case Expr::CXXReinterpretCastExprClass:
+ NotPrimaryExpr();
mangleCastExpression(E, "rc");
break;
case Expr::CXXConstCastExprClass:
+ NotPrimaryExpr();
mangleCastExpression(E, "cc");
break;
case Expr::CXXAddrspaceCastExprClass:
+ NotPrimaryExpr();
mangleCastExpression(E, "ac");
break;
case Expr::CXXOperatorCallExprClass: {
+ NotPrimaryExpr();
const CXXOperatorCallExpr *CE = cast<CXXOperatorCallExpr>(E);
unsigned NumArgs = CE->getNumArgs();
// A CXXOperatorCallExpr for OO_Arrow models only semantics, not syntax
@@ -4512,9 +4627,8 @@ recurse:
}
case Expr::ParenExprClass:
- mangleExpression(cast<ParenExpr>(E)->getSubExpr(), Arity);
- break;
-
+ E = cast<ParenExpr>(E)->getSubExpr();
+ goto recurse;
case Expr::ConceptSpecializationExprClass: {
// <expr-primary> ::= L <mangled-name> E # external name
@@ -4528,10 +4642,12 @@ recurse:
}
case Expr::DeclRefExprClass:
- mangleDeclRefExpr(cast<DeclRefExpr>(E)->getDecl());
+ // MangleDeclRefExpr helper handles primary-vs-nonprimary
+ MangleDeclRefExpr(cast<DeclRefExpr>(E)->getDecl());
break;
case Expr::SubstNonTypeTemplateParmPackExprClass:
+ NotPrimaryExpr();
// FIXME: not clear how to mangle this!
// template <unsigned N...> class A {
// template <class U...> void foo(U (&x)[N]...);
@@ -4540,14 +4656,16 @@ recurse:
break;
case Expr::FunctionParmPackExprClass: {
+ NotPrimaryExpr();
// FIXME: not clear how to mangle this!
const FunctionParmPackExpr *FPPE = cast<FunctionParmPackExpr>(E);
Out << "v110_SUBSTPACK";
- mangleDeclRefExpr(FPPE->getParameterPack());
+ MangleDeclRefExpr(FPPE->getParameterPack());
break;
}
case Expr::DependentScopeDeclRefExprClass: {
+ NotPrimaryExpr();
const DependentScopeDeclRefExpr *DRE = cast<DependentScopeDeclRefExpr>(E);
mangleUnresolvedName(DRE->getQualifier(), DRE->getDeclName(),
DRE->getTemplateArgs(), DRE->getNumTemplateArgs(),
@@ -4556,24 +4674,27 @@ recurse:
}
case Expr::CXXBindTemporaryExprClass:
- mangleExpression(cast<CXXBindTemporaryExpr>(E)->getSubExpr());
- break;
+ E = cast<CXXBindTemporaryExpr>(E)->getSubExpr();
+ goto recurse;
case Expr::ExprWithCleanupsClass:
- mangleExpression(cast<ExprWithCleanups>(E)->getSubExpr(), Arity);
- break;
+ E = cast<ExprWithCleanups>(E)->getSubExpr();
+ goto recurse;
case Expr::FloatingLiteralClass: {
+ // <expr-primary>
const FloatingLiteral *FL = cast<FloatingLiteral>(E);
mangleFloatLiteral(FL->getType(), FL->getValue());
break;
}
case Expr::FixedPointLiteralClass:
+ // Currently unimplemented -- might be <expr-primary> in future?
mangleFixedPointLiteral();
break;
case Expr::CharacterLiteralClass:
+ // <expr-primary>
Out << 'L';
mangleType(E->getType());
Out << cast<CharacterLiteral>(E)->getValue();
@@ -4582,18 +4703,21 @@ recurse:
// FIXME. __objc_yes/__objc_no are mangled same as true/false
case Expr::ObjCBoolLiteralExprClass:
+ // <expr-primary>
Out << "Lb";
Out << (cast<ObjCBoolLiteralExpr>(E)->getValue() ? '1' : '0');
Out << 'E';
break;
case Expr::CXXBoolLiteralExprClass:
+ // <expr-primary>
Out << "Lb";
Out << (cast<CXXBoolLiteralExpr>(E)->getValue() ? '1' : '0');
Out << 'E';
break;
case Expr::IntegerLiteralClass: {
+ // <expr-primary>
llvm::APSInt Value(cast<IntegerLiteral>(E)->getValue());
if (E->getType()->isSignedIntegerType())
Value.setIsSigned(true);
@@ -4602,6 +4726,7 @@ recurse:
}
case Expr::ImaginaryLiteralClass: {
+ // <expr-primary>
const ImaginaryLiteral *IE = cast<ImaginaryLiteral>(E);
// Mangle as if a complex literal.
// Proposal from David Vandevoorde, 2010.06.30.
@@ -4625,6 +4750,7 @@ recurse:
}
case Expr::StringLiteralClass: {
+ // <expr-primary>
// Revised proposal from David Vandervoorde, 2010.07.15.
Out << 'L';
assert(isa<ConstantArrayType>(E->getType()));
@@ -4634,21 +4760,25 @@ recurse:
}
case Expr::GNUNullExprClass:
+ // <expr-primary>
// Mangle as if an integer literal 0.
mangleIntegerLiteral(E->getType(), llvm::APSInt(32));
break;
case Expr::CXXNullPtrLiteralExprClass: {
+ // <expr-primary>
Out << "LDnE";
break;
}
case Expr::PackExpansionExprClass:
+ NotPrimaryExpr();
Out << "sp";
mangleExpression(cast<PackExpansionExpr>(E)->getPattern());
break;
case Expr::SizeOfPackExprClass: {
+ NotPrimaryExpr();
auto *SPE = cast<SizeOfPackExpr>(E);
if (SPE->isPartiallySubstituted()) {
Out << "sP";
@@ -4673,12 +4803,12 @@ recurse:
break;
}
- case Expr::MaterializeTemporaryExprClass: {
- mangleExpression(cast<MaterializeTemporaryExpr>(E)->getSubExpr());
- break;
- }
+ case Expr::MaterializeTemporaryExprClass:
+ E = cast<MaterializeTemporaryExpr>(E)->getSubExpr();
+ goto recurse;
case Expr::CXXFoldExprClass: {
+ NotPrimaryExpr();
auto *FE = cast<CXXFoldExpr>(E);
if (FE->isLeftFold())
Out << (FE->getInit() ? "fL" : "fl");
@@ -4700,27 +4830,34 @@ recurse:
}
case Expr::CXXThisExprClass:
+ NotPrimaryExpr();
Out << "fpT";
break;
case Expr::CoawaitExprClass:
// FIXME: Propose a non-vendor mangling.
+ NotPrimaryExpr();
Out << "v18co_await";
mangleExpression(cast<CoawaitExpr>(E)->getOperand());
break;
case Expr::DependentCoawaitExprClass:
// FIXME: Propose a non-vendor mangling.
+ NotPrimaryExpr();
Out << "v18co_await";
mangleExpression(cast<DependentCoawaitExpr>(E)->getOperand());
break;
case Expr::CoyieldExprClass:
// FIXME: Propose a non-vendor mangling.
+ NotPrimaryExpr();
Out << "v18co_yield";
mangleExpression(cast<CoawaitExpr>(E)->getOperand());
break;
}
+
+ if (AsTemplateArg && !IsPrimaryExpr)
+ Out << 'E';
}
/// Mangle an expression which refers to a parameter variable.
@@ -4970,26 +5107,9 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) {
Out << "Dp";
mangleType(A.getAsTemplateOrTemplatePattern());
break;
- case TemplateArgument::Expression: {
- // It's possible to end up with a DeclRefExpr here in certain
- // dependent cases, in which case we should mangle as a
- // declaration.
- const Expr *E = A.getAsExpr()->IgnoreParenImpCasts();
- if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
- const ValueDecl *D = DRE->getDecl();
- if (isa<VarDecl>(D) || isa<FunctionDecl>(D)) {
- Out << 'L';
- mangle(D);
- Out << 'E';
- break;
- }
- }
-
- Out << 'X';
- mangleExpression(E);
- Out << 'E';
+ case TemplateArgument::Expression:
+ mangleTemplateArgExpr(A.getAsExpr());
break;
- }
case TemplateArgument::Integral:
mangleIntegerLiteral(A.getIntegralType(), A.getAsIntegral());
break;
@@ -5044,6 +5164,38 @@ void CXXNameMangler::mangleTemplateArg(TemplateArgument A, bool NeedExactType) {
}
}
+void CXXNameMangler::mangleTemplateArgExpr(const Expr *E) {
+ ASTContext &Ctx = Context.getASTContext();
+ if (Ctx.getLangOpts().getClangABICompat() > LangOptions::ClangABI::Ver11) {
+ mangleExpression(E, UnknownArity, /*AsTemplateArg=*/true);
+ return;
+ }
+
+ // Prior to Clang 12, we didn't omit the X .. E around <expr-primary>
+ // correctly in cases where the template argument was
+ // constructed from an expression rather than an already-evaluated
+ // literal. In such a case, we would then e.g. emit 'XLi0EE' instead of
+ // 'Li0E'.
+ //
+ // We did special-case DeclRefExpr to attempt to DTRT for that one
+ // expression-kind, but while doing so, unfortunately handled ParmVarDecl
+ // (subtype of VarDecl) _incorrectly_, and emitted 'L_Z .. E' instead of
+ // the proper 'Xfp_E'.
+ E = E->IgnoreParenImpCasts();
+ if (const DeclRefExpr *DRE = dyn_cast<DeclRefExpr>(E)) {
+ const ValueDecl *D = DRE->getDecl();
+ if (isa<VarDecl>(D) || isa<FunctionDecl>(D)) {
+ Out << 'L';
+ mangle(D);
+ Out << 'E';
+ return;
+ }
+ }
+ Out << 'X';
+ mangleExpression(E);
+ Out << 'E';
+}
+
/// Determine whether a given value is equivalent to zero-initialization for
/// the purpose of discarding a trailing portion of a 'tl' mangling.
///
diff --git a/clang/lib/AST/MicrosoftCXXABI.cpp b/clang/lib/AST/MicrosoftCXXABI.cpp
index f9f9fe985b6f..166aa3b3bd60 100644
--- a/clang/lib/AST/MicrosoftCXXABI.cpp
+++ b/clang/lib/AST/MicrosoftCXXABI.cpp
@@ -16,6 +16,7 @@
#include "clang/AST/Attr.h"
#include "clang/AST/CXXInheritance.h"
#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Mangle.h"
#include "clang/AST/MangleNumberingContext.h"
#include "clang/AST/RecordLayout.h"
#include "clang/AST/Type.h"
@@ -64,6 +65,19 @@ public:
}
};
+class MSHIPNumberingContext : public MicrosoftNumberingContext {
+ std::unique_ptr<MangleNumberingContext> DeviceCtx;
+
+public:
+ MSHIPNumberingContext(MangleContext *DeviceMangler) {
+ DeviceCtx = createItaniumNumberingContext(DeviceMangler);
+ }
+
+ unsigned getDeviceManglingNumber(const CXXMethodDecl *CallOperator) override {
+ return DeviceCtx->getManglingNumber(CallOperator);
+ }
+};
+
class MicrosoftCXXABI : public CXXABI {
ASTContext &Context;
llvm::SmallDenseMap<CXXRecordDecl *, CXXConstructorDecl *> RecordToCopyCtor;
@@ -73,8 +87,20 @@ class MicrosoftCXXABI : public CXXABI {
llvm::SmallDenseMap<TagDecl *, TypedefNameDecl *>
UnnamedTagDeclToTypedefNameDecl;
+ // MangleContext for device numbering context, which is based on Itanium C++
+ // ABI.
+ std::unique_ptr<MangleContext> DeviceMangler;
+
public:
- MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) { }
+ MicrosoftCXXABI(ASTContext &Ctx) : Context(Ctx) {
+ if (Context.getLangOpts().CUDA && Context.getAuxTargetInfo()) {
+ assert(Context.getTargetInfo().getCXXABI().isMicrosoft() &&
+ Context.getAuxTargetInfo()->getCXXABI().isItaniumFamily() &&
+ "Unexpected combination of C++ ABIs.");
+ DeviceMangler.reset(
+ Context.createMangleContext(Context.getAuxTargetInfo()));
+ }
+ }
MemberPointerInfo
getMemberPointerInfo(const MemberPointerType *MPT) const override;
@@ -133,6 +159,10 @@ public:
std::unique_ptr<MangleNumberingContext>
createMangleNumberingContext() const override {
+ if (Context.getLangOpts().CUDA && Context.getAuxTargetInfo()) {
+ assert(DeviceMangler && "Missing device mangler");
+ return std::make_unique<MSHIPNumberingContext>(DeviceMangler.get());
+ }
return std::make_unique<MicrosoftNumberingContext>();
}
};
@@ -266,4 +296,3 @@ CXXABI::MemberPointerInfo MicrosoftCXXABI::getMemberPointerInfo(
CXXABI *clang::CreateMicrosoftCXXABI(ASTContext &Ctx) {
return new MicrosoftCXXABI(Ctx);
}
-
diff --git a/clang/lib/ASTMatchers/ASTMatchFinder.cpp b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
index 8ddd3c87e09d..69957a952d17 100644
--- a/clang/lib/ASTMatchers/ASTMatchFinder.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchFinder.cpp
@@ -243,10 +243,14 @@ public:
return true;
ScopedIncrement ScopedDepth(&CurrentDepth);
if (auto *Init = Node->getInit())
- if (!match(*Init))
+ if (!traverse(*Init))
return false;
- if (!match(*Node->getLoopVariable()) || !match(*Node->getRangeInit()) ||
- !match(*Node->getBody()))
+ if (!match(*Node->getLoopVariable()))
+ return false;
+ if (match(*Node->getRangeInit()))
+ if (!VisitorBase::TraverseStmt(Node->getRangeInit()))
+ return false;
+ if (!match(*Node->getBody()))
return false;
return VisitorBase::TraverseStmt(Node->getBody());
}
@@ -291,7 +295,7 @@ public:
if (!match(*Node->getBody()))
return false;
- return true;
+ return VisitorBase::TraverseStmt(Node->getBody());
}
bool shouldVisitTemplateInstantiations() const { return true; }
@@ -488,15 +492,21 @@ public:
bool dataTraverseNode(Stmt *S, DataRecursionQueue *Queue) {
if (auto *RF = dyn_cast<CXXForRangeStmt>(S)) {
- for (auto *SubStmt : RF->children()) {
- if (SubStmt == RF->getInit() || SubStmt == RF->getLoopVarStmt() ||
- SubStmt == RF->getRangeInit() || SubStmt == RF->getBody()) {
- TraverseStmt(SubStmt, Queue);
- } else {
- ASTNodeNotSpelledInSourceScope RAII(this, true);
- TraverseStmt(SubStmt, Queue);
+ {
+ ASTNodeNotAsIsSourceScope RAII(this, true);
+ TraverseStmt(RF->getInit());
+ // Don't traverse under the loop variable
+ match(*RF->getLoopVariable());
+ TraverseStmt(RF->getRangeInit());
+ }
+ {
+ ASTNodeNotSpelledInSourceScope RAII(this, true);
+ for (auto *SubStmt : RF->children()) {
+ if (SubStmt != RF->getBody())
+ TraverseStmt(SubStmt);
}
}
+ TraverseStmt(RF->getBody());
return true;
} else if (auto *RBO = dyn_cast<CXXRewrittenBinaryOperator>(S)) {
{
@@ -556,9 +566,9 @@ public:
if (LE->hasExplicitResultType())
TraverseTypeLoc(Proto.getReturnLoc());
TraverseStmt(LE->getTrailingRequiresClause());
-
- TraverseStmt(LE->getBody());
}
+
+ TraverseStmt(LE->getBody());
return true;
}
return RecursiveASTVisitor<MatchASTVisitor>::dataTraverseNode(S, Queue);
@@ -697,6 +707,10 @@ public:
bool shouldVisitTemplateInstantiations() const { return true; }
bool shouldVisitImplicitCode() const { return true; }
+ // We visit the lambda body explicitly, so instruct the RAV
+ // to not visit it on our behalf too.
+ bool shouldVisitLambdaBody() const { return false; }
+
bool IsMatchingInASTNodeNotSpelledInSource() const override {
return TraversingASTNodeNotSpelledInSource;
}
@@ -823,6 +837,14 @@ private:
if (EnableCheckProfiling)
Timer.setBucket(&TimeByBucket[MP.second->getID()]);
BoundNodesTreeBuilder Builder;
+
+ {
+ TraversalKindScope RAII(getASTContext(), MP.first.getTraversalKind());
+ if (getASTContext().getParentMapContext().traverseIgnored(DynNode) !=
+ DynNode)
+ continue;
+ }
+
if (MP.first.matches(DynNode, this, &Builder)) {
MatchVisitor Visitor(ActiveASTContext, MP.second);
Builder.visitMatches(&Visitor);
diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
index 6c7e14e3499a..705f1cdf3153 100644
--- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
+++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp
@@ -732,7 +732,7 @@ const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasDecl> typeAliasDecl;
const internal::VariadicDynCastAllOfMatcher<Decl, TypeAliasTemplateDecl>
typeAliasTemplateDecl;
const internal::VariadicAllOfMatcher<Decl> decl;
-const internal::VariadicAllOfMatcher<DecompositionDecl> decompositionDecl;
+const internal::VariadicDynCastAllOfMatcher<Decl, DecompositionDecl> decompositionDecl;
const internal::VariadicDynCastAllOfMatcher<Decl, LinkageSpecDecl>
linkageSpecDecl;
const internal::VariadicDynCastAllOfMatcher<Decl, NamedDecl> namedDecl;
diff --git a/clang/lib/Basic/ProfileList.cpp b/clang/lib/Basic/ProfileList.cpp
index 56bc37a79301..2cb05c1c3c07 100644
--- a/clang/lib/Basic/ProfileList.cpp
+++ b/clang/lib/Basic/ProfileList.cpp
@@ -82,6 +82,7 @@ static StringRef getSectionName(CodeGenOptions::ProfileInstrKind Kind) {
case CodeGenOptions::ProfileCSIRInstr:
return "csllvm";
}
+ llvm_unreachable("Unhandled CodeGenOptions::ProfileInstrKind enum");
}
llvm::Optional<bool>
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index cfede6e6e756..ff09c0fa2a23 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -318,9 +318,6 @@ bool PPCTargetInfo::initFeatureMap(
.Case("pwr9", true)
.Case("pwr8", true)
.Default(false);
- Features["float128"] = llvm::StringSwitch<bool>(CPU)
- .Case("pwr9", true)
- .Default(false);
Features["spe"] = llvm::StringSwitch<bool>(CPU)
.Case("8548", true)
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 0bf02e605740..786201ea340d 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -150,7 +150,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
}
if (HasV) {
- Builder.defineMacro("__riscv_v", "1000000");
+ Builder.defineMacro("__riscv_v", "10000");
Builder.defineMacro("__riscv_vector");
}
@@ -191,10 +191,10 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__riscv_zfh", "1000");
if (HasZvamo)
- Builder.defineMacro("__riscv_zvamo", "1000000");
+ Builder.defineMacro("__riscv_zvamo", "10000");
if (HasZvlsseg)
- Builder.defineMacro("__riscv_zvlsseg", "1000000");
+ Builder.defineMacro("__riscv_zvlsseg", "10000");
}
/// Return true if has this feature, need to sync with handleTargetFeatures.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 113541bd5024..10e3820d9657 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13794,12 +13794,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_reduce_fadd_ps512: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fadd, Ops[1]->getType());
+ Builder.getFastMathFlags().setAllowReassoc(true);
return Builder.CreateCall(F, {Ops[0], Ops[1]});
}
case X86::BI__builtin_ia32_reduce_fmul_pd512:
case X86::BI__builtin_ia32_reduce_fmul_ps512: {
Function *F =
CGM.getIntrinsic(Intrinsic::vector_reduce_fmul, Ops[1]->getType());
+ Builder.getFastMathFlags().setAllowReassoc(true);
return Builder.CreateCall(F, {Ops[0], Ops[1]});
}
case X86::BI__builtin_ia32_reduce_mul_d512:
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 33a2d6f4483e..e03631a7243a 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -184,6 +184,14 @@ CGNVCUDARuntime::CGNVCUDARuntime(CodeGenModule &CGM)
CharPtrTy = llvm::PointerType::getUnqual(Types.ConvertType(Ctx.CharTy));
VoidPtrTy = cast<llvm::PointerType>(Types.ConvertType(Ctx.VoidPtrTy));
VoidPtrPtrTy = VoidPtrTy->getPointerTo();
+ if (CGM.getContext().getAuxTargetInfo()) {
+ // If the host and device have different C++ ABIs, mark it as the device
+ // mangle context so that the mangling needs to retrieve the additonal
+ // device lambda mangling number instead of the regular host one.
+ DeviceMC->setDeviceMangleContext(
+ CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
+ CGM.getContext().getAuxTargetInfo()->getCXXABI().isItaniumFamily());
+ }
}
llvm::FunctionCallee CGNVCUDARuntime::getSetupArgumentFn() const {
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index ffae47e5672e..c7f2a3ea5e02 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -63,7 +63,7 @@ isExperimentalExtension(StringRef Ext) {
Ext == "zbr" || Ext == "zbs" || Ext == "zbt" || Ext == "zbproposedc")
return RISCVExtensionVersion{"0", "93"};
if (Ext == "v" || Ext == "zvamo" || Ext == "zvlsseg")
- return RISCVExtensionVersion{"1", "0"};
+ return RISCVExtensionVersion{"0", "10"};
if (Ext == "zfh")
return RISCVExtensionVersion{"0", "1"};
return None;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fdb8a58cd1b3..f8e637974662 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4669,20 +4669,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}
}
- if (Triple.isOSAIX() && Args.hasArg(options::OPT_maltivec)) {
- if (Args.getLastArg(options::OPT_mabi_EQ_vec_extabi)) {
- CmdArgs.push_back("-mabi=vec-extabi");
- } else {
- D.Diag(diag::err_aix_default_altivec_abi);
- }
- }
-
if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ_vec_extabi,
options::OPT_mabi_EQ_vec_default)) {
if (!Triple.isOSAIX())
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< A->getSpelling() << RawTriple.str();
- if (A->getOption().getID() == options::OPT_mabi_EQ_vec_default)
+ if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi)
+ CmdArgs.push_back("-mabi=vec-extabi");
+ else
D.Diag(diag::err_aix_default_altivec_abi);
}
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 6a95aa5ec628..bcaea71dca94 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -605,6 +605,11 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
CmdArgs.push_back("-plugin-opt=new-pass-manager");
}
+ // Pass an option to enable pseudo probe emission.
+ if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
+ options::OPT_fno_pseudo_probe_for_profiling, false))
+ CmdArgs.push_back("-plugin-opt=pseudo-probe-for-profiling");
+
// Setup statistics file output.
SmallString<128> StatsFile = getStatsFileName(Args, Output, Input, D);
if (!StatsFile.empty())
diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index e17a6bd4bdd2..9663a7390ada 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -236,15 +236,6 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
ExtraOpts.push_back("relro");
}
- if (Triple.isAndroid() && Triple.isAndroidVersionLT(29)) {
- // https://github.com/android/ndk/issues/1196
- // The unwinder used by the crash handler on versions of Android prior to
- // API 29 did not correctly handle binaries built with rosegment, which is
- // enabled by default for LLD. Android only supports LLD, so it's not an
- // issue that this flag is not accepted by other linkers.
- ExtraOpts.push_back("--no-rosegment");
- }
-
// Android ARM/AArch64 use max-page-size=4096 to reduce VMA usage. Note, lld
// from 11 onwards default max-page-size to 65536 for both ARM and AArch64.
if ((Triple.isARM() || Triple.isAArch64()) && Triple.isAndroid()) {
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index d1138bbc9c36..5dd0ccdfa6fd 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -371,7 +371,7 @@ private:
if (Previous->is(tok::comment))
Previous = Previous->getPreviousNonComment();
if (Previous) {
- if (Previous->is(tok::greater))
+ if (Previous->is(tok::greater) && !I[-1]->InPPDirective)
return 0;
if (Previous->is(tok::identifier)) {
const FormatToken *PreviousPrevious =
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index d8be4ea14868..036388ebd355 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2470,6 +2470,8 @@ void CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args,
bool IsTargetSpecified =
Opts.OpenMPIsDevice || Args.hasArg(options::OPT_fopenmp_targets_EQ);
+ Opts.ConvergentFunctions = Opts.ConvergentFunctions || Opts.OpenMPIsDevice;
+
if (Opts.OpenMP || Opts.OpenMPSimd) {
if (int Version = getLastArgIntValue(
Args, OPT_fopenmp_version_EQ,
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 2ee4350b14d4..f226382cbb2c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -9297,9 +9297,12 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
* outputs. This class of vector operation forms the basis of many scientific
- * computations. In vector-reduction arithmetic, the evaluation off is
+ * computations. In vector-reduction arithmetic, the evaluation order is
* independent of the order of the input elements of V.
+ * For floating point types, we always assume the elements are reassociable even
+ * if -fast-math is off.
+
* Used bisection method. At each step, we partition the vector with previous
* step in half, and the operation is performed on its two halves.
* This takes log2(n) steps where n is the number of elements in the vector.
@@ -9345,8 +9348,11 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
return __builtin_ia32_reduce_or_q512(__W);
}
+// -0.0 is used to ignore the start value since it is the neutral value of
+// floating point addition. For more information, please refer to
+// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
- return __builtin_ia32_reduce_fadd_pd512(0.0, __W);
+ return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
}
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
@@ -9356,7 +9362,7 @@ static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ double __DEFAULT_FN_ATTRS512
_mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
__W = _mm512_maskz_mov_pd(__M, __W);
- return __builtin_ia32_reduce_fadd_pd512(0.0, __W);
+ return __builtin_ia32_reduce_fadd_pd512(-0.0, __W);
}
static __inline__ double __DEFAULT_FN_ATTRS512
@@ -9411,7 +9417,7 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_reduce_add_ps(__m512 __W) {
- return __builtin_ia32_reduce_fadd_ps512(0.0f, __W);
+ return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
}
static __inline__ float __DEFAULT_FN_ATTRS512
@@ -9422,7 +9428,7 @@ _mm512_reduce_mul_ps(__m512 __W) {
static __inline__ float __DEFAULT_FN_ATTRS512
_mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
__W = _mm512_maskz_mov_ps(__M, __W);
- return __builtin_ia32_reduce_fadd_ps512(0.0f, __W);
+ return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W);
}
static __inline__ float __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 94f1ce91f884..177786d90390 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -119,12 +119,8 @@ Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
// a macro. They get unpoisoned where it is allowed.
(Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned();
SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use);
- if (getLangOpts().CPlusPlus20) {
- (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
- SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
- } else {
- Ident__VA_OPT__ = nullptr;
- }
+ (Ident__VA_OPT__ = getIdentifierInfo("__VA_OPT__"))->setIsPoisoned();
+ SetPoisonReason(Ident__VA_OPT__,diag::ext_pp_bad_vaopt_use);
// Initialize the pragma handlers.
RegisterBuiltinPragmas();
diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp
index 97cb2cf0bb8c..da5681aaf478 100644
--- a/clang/lib/Lex/TokenLexer.cpp
+++ b/clang/lib/Lex/TokenLexer.cpp
@@ -148,12 +148,12 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
return false;
// GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
- // __VA_ARGS__ is empty, but not in strict mode where there are no
- // named arguments, where it remains. With GNU extensions, it is removed
- // regardless of named arguments.
+ // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
+ // named arguments, where it remains. In all other modes, including C99
+ // with GNU extensions, it is removed regardless of named arguments.
// Microsoft also appears to support this extension, unofficially.
- if (!PP.getLangOpts().GNUMode && !PP.getLangOpts().MSVCCompat &&
- Macro->getNumParams() < 2)
+ if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
+ && Macro->getNumParams() < 2)
return false;
// Is a comma available to be removed?
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 571164139630..347d992b1643 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -4216,7 +4216,7 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc,
}
// Parse _Static_assert declaration.
- if (Tok.is(tok::kw__Static_assert)) {
+ if (Tok.isOneOf(tok::kw__Static_assert, tok::kw_static_assert)) {
SourceLocation DeclEnd;
ParseStaticAssertDeclaration(DeclEnd);
continue;
@@ -5180,6 +5180,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
case tok::kw_friend:
// static_assert-declaration
+ case tok::kw_static_assert:
case tok::kw__Static_assert:
// GNU typeof support.
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 55cb3aee6194..450f9c020f7f 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -14,6 +14,7 @@
#include "UsedDeclVisitor.h"
#include "clang/AST/ASTContext.h"
#include "clang/AST/ASTDiagnostic.h"
+#include "clang/AST/Decl.h"
#include "clang/AST/DeclCXX.h"
#include "clang/AST/DeclFriend.h"
#include "clang/AST/DeclObjC.h"
@@ -537,6 +538,13 @@ void Sema::diagnoseZeroToNullptrConversion(CastKind Kind, const Expr* E) {
if (E->IgnoreParenImpCasts()->getType()->isNullPtrType())
return;
+ // Don't diagnose the conversion from a 0 literal to a null pointer argument
+ // in a synthesized call to operator<=>.
+ if (!CodeSynthesisContexts.empty() &&
+ CodeSynthesisContexts.back().Kind ==
+ CodeSynthesisContext::RewritingOperatorAsSpaceship)
+ return;
+
// If it is a macro from system header, and if the macro name is not "NULL",
// do not warn.
SourceLocation MaybeMacroLoc = E->getBeginLoc();
@@ -1733,11 +1741,12 @@ Sema::SemaDiagnosticBuilder::~SemaDiagnosticBuilder() {
}
}
-Sema::SemaDiagnosticBuilder Sema::targetDiag(SourceLocation Loc,
- unsigned DiagID) {
+Sema::SemaDiagnosticBuilder
+Sema::targetDiag(SourceLocation Loc, unsigned DiagID, FunctionDecl *FD) {
+ FD = FD ? FD : getCurFunctionDecl();
if (LangOpts.OpenMP)
- return LangOpts.OpenMPIsDevice ? diagIfOpenMPDeviceCode(Loc, DiagID)
- : diagIfOpenMPHostCode(Loc, DiagID);
+ return LangOpts.OpenMPIsDevice ? diagIfOpenMPDeviceCode(Loc, DiagID, FD)
+ : diagIfOpenMPHostCode(Loc, DiagID, FD);
if (getLangOpts().CUDA)
return getLangOpts().CUDAIsDevice ? CUDADiagIfDeviceCode(Loc, DiagID)
: CUDADiagIfHostCode(Loc, DiagID);
@@ -1746,7 +1755,7 @@ Sema::SemaDiagnosticBuilder Sema::targetDiag(SourceLocation Loc,
return SYCLDiagIfDeviceCode(Loc, DiagID);
return SemaDiagnosticBuilder(SemaDiagnosticBuilder::K_Immediate, Loc, DiagID,
- getCurFunctionDecl(), *this);
+ FD, *this);
}
Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID,
@@ -1765,15 +1774,14 @@ Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID,
DiagID, getCurFunctionDecl(), *this);
}
- SemaDiagnosticBuilder DB =
- getLangOpts().CUDAIsDevice
- ? CUDADiagIfDeviceCode(Loc, DiagID)
- : CUDADiagIfHostCode(Loc, DiagID);
+ SemaDiagnosticBuilder DB = getLangOpts().CUDAIsDevice
+ ? CUDADiagIfDeviceCode(Loc, DiagID)
+ : CUDADiagIfHostCode(Loc, DiagID);
SetIsLastErrorImmediate(DB.isImmediate());
return DB;
}
-void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
+void Sema::checkDeviceDecl(ValueDecl *D, SourceLocation Loc) {
if (isUnevaluatedContext())
return;
@@ -1791,13 +1799,17 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
return;
}
+ // Try to associate errors with the lexical context, if that is a function, or
+ // the value declaration otherwise.
+ FunctionDecl *FD =
+ isa<FunctionDecl>(C) ? cast<FunctionDecl>(C) : dyn_cast<FunctionDecl>(D);
auto CheckType = [&](QualType Ty) {
if (Ty->isDependentType())
return;
if (Ty->isExtIntType()) {
if (!Context.getTargetInfo().hasExtIntType()) {
- targetDiag(Loc, diag::err_device_unsupported_type)
+ targetDiag(Loc, diag::err_device_unsupported_type, FD)
<< D << false /*show bit size*/ << 0 /*bitsize*/
<< Ty << Context.getTargetInfo().getTriple().str();
}
@@ -1810,11 +1822,12 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
!Context.getTargetInfo().hasFloat128Type()) ||
(Ty->isIntegerType() && Context.getTypeSize(Ty) == 128 &&
!Context.getTargetInfo().hasInt128Type())) {
- targetDiag(Loc, diag::err_device_unsupported_type)
+ if (targetDiag(Loc, diag::err_device_unsupported_type, FD)
<< D << true /*show bit size*/
<< static_cast<unsigned>(Context.getTypeSize(Ty)) << Ty
- << Context.getTargetInfo().getTriple().str();
- targetDiag(D->getLocation(), diag::note_defined_here) << D;
+ << Context.getTargetInfo().getTriple().str())
+ D->setInvalidDecl();
+ targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
}
};
@@ -1826,6 +1839,8 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
CheckType(ParamTy);
CheckType(FPTy->getReturnType());
}
+ if (const auto *FNPTy = dyn_cast<FunctionNoProtoType>(Ty))
+ CheckType(FNPTy->getReturnType());
}
/// Looks through the macro-expansion chain for the given
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 3ee0c43097d7..1f7ab49ccdd7 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9420,6 +9420,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
}
}
+ if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice))
+ checkDeviceDecl(NewFD, D.getBeginLoc());
+
if (!getLangOpts().CPlusPlus) {
// Perform semantic checking on the function declaration.
if (!NewFD->isInvalidDecl() && NewFD->isMain())
@@ -18329,42 +18332,51 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD,
if (FD->isDependentContext())
return FunctionEmissionStatus::TemplateDiscarded;
- FunctionEmissionStatus OMPES = FunctionEmissionStatus::Unknown;
+ // Check whether this function is an externally visible definition.
+ auto IsEmittedForExternalSymbol = [this, FD]() {
+ // We have to check the GVA linkage of the function's *definition* -- if we
+ // only have a declaration, we don't know whether or not the function will
+ // be emitted, because (say) the definition could include "inline".
+ FunctionDecl *Def = FD->getDefinition();
+
+ return Def && !isDiscardableGVALinkage(
+ getASTContext().GetGVALinkageForFunction(Def));
+ };
+
if (LangOpts.OpenMPIsDevice) {
+ // In OpenMP device mode we will not emit host only functions, or functions
+ // we don't need due to their linkage.
Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
- if (DevTy.hasValue()) {
+ // DevTy may be changed later by
+ // #pragma omp declare target to(*) device_type(*).
+ // Therefore DevTyhaving no value does not imply host. The emission status
+ // will be checked again at the end of compilation unit with Final = true.
+ if (DevTy.hasValue())
if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host)
- OMPES = FunctionEmissionStatus::OMPDiscarded;
- else if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost ||
- *DevTy == OMPDeclareTargetDeclAttr::DT_Any) {
- OMPES = FunctionEmissionStatus::Emitted;
- }
- }
- } else if (LangOpts.OpenMP) {
- // In OpenMP 4.5 all the functions are host functions.
- if (LangOpts.OpenMP <= 45) {
- OMPES = FunctionEmissionStatus::Emitted;
- } else {
- Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
- OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
- // In OpenMP 5.0 or above, DevTy may be changed later by
- // #pragma omp declare target to(*) device_type(*). Therefore DevTy
- // having no value does not imply host. The emission status will be
- // checked again at the end of compilation unit.
- if (DevTy.hasValue()) {
- if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost) {
- OMPES = FunctionEmissionStatus::OMPDiscarded;
- } else if (*DevTy == OMPDeclareTargetDeclAttr::DT_Host ||
- *DevTy == OMPDeclareTargetDeclAttr::DT_Any)
- OMPES = FunctionEmissionStatus::Emitted;
- } else if (Final)
- OMPES = FunctionEmissionStatus::Emitted;
- }
- }
- if (OMPES == FunctionEmissionStatus::OMPDiscarded ||
- (OMPES == FunctionEmissionStatus::Emitted && !LangOpts.CUDA))
- return OMPES;
+ return FunctionEmissionStatus::OMPDiscarded;
+ // If we have an explicit value for the device type, or we are in a target
+ // declare context, we need to emit all extern and used symbols.
+ if (isInOpenMPDeclareTargetContext() || DevTy.hasValue())
+ if (IsEmittedForExternalSymbol())
+ return FunctionEmissionStatus::Emitted;
+ // Device mode only emits what it must, if it wasn't tagged yet and needed,
+ // we'll omit it.
+ if (Final)
+ return FunctionEmissionStatus::OMPDiscarded;
+ } else if (LangOpts.OpenMP > 45) {
+ // In OpenMP host compilation prior to 5.0 everything was an emitted host
+ // function. In 5.0, no_host was introduced which might cause a function to
+ // be ommitted.
+ Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
+ OMPDeclareTargetDeclAttr::getDeviceType(FD->getCanonicalDecl());
+ if (DevTy.hasValue())
+ if (*DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
+ return FunctionEmissionStatus::OMPDiscarded;
+ }
+
+ if (Final && LangOpts.OpenMP && !LangOpts.CUDA)
+ return FunctionEmissionStatus::Emitted;
if (LangOpts.CUDA) {
// When compiling for device, host functions are never emitted. Similarly,
@@ -18378,17 +18390,7 @@ Sema::FunctionEmissionStatus Sema::getEmissionStatus(FunctionDecl *FD,
(T == Sema::CFT_Device || T == Sema::CFT_Global))
return FunctionEmissionStatus::CUDADiscarded;
- // Check whether this function is externally visible -- if so, it's
- // known-emitted.
- //
- // We have to check the GVA linkage of the function's *definition* -- if we
- // only have a declaration, we don't know whether or not the function will
- // be emitted, because (say) the definition could include "inline".
- FunctionDecl *Def = FD->getDefinition();
-
- if (Def &&
- !isDiscardableGVALinkage(getASTContext().GetGVALinkageForFunction(Def))
- && (!LangOpts.OpenMP || OMPES == FunctionEmissionStatus::Emitted))
+ if (IsEmittedForExternalSymbol())
return FunctionEmissionStatus::Emitted;
}
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 45616dadcbee..ae8508d6c601 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -373,7 +373,7 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, ArrayRef<SourceLocation> Locs,
}
if (LangOpts.SYCLIsDevice || (LangOpts.OpenMP && LangOpts.OpenMPIsDevice)) {
- if (const auto *VD = dyn_cast<ValueDecl>(D))
+ if (auto *VD = dyn_cast<ValueDecl>(D))
checkDeviceDecl(VD, Loc);
if (!Context.getTargetInfo().isTLSSupported())
diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp
index af61c82c2002..c1c6a4bf5c68 100644
--- a/clang/lib/Sema/SemaLambda.cpp
+++ b/clang/lib/Sema/SemaLambda.cpp
@@ -432,15 +432,16 @@ CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
void Sema::handleLambdaNumbering(
CXXRecordDecl *Class, CXXMethodDecl *Method,
- Optional<std::tuple<unsigned, bool, Decl *>> Mangling) {
+ Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling) {
if (Mangling) {
- unsigned ManglingNumber;
bool HasKnownInternalLinkage;
+ unsigned ManglingNumber, DeviceManglingNumber;
Decl *ManglingContextDecl;
- std::tie(ManglingNumber, HasKnownInternalLinkage, ManglingContextDecl) =
- Mangling.getValue();
+ std::tie(HasKnownInternalLinkage, ManglingNumber, DeviceManglingNumber,
+ ManglingContextDecl) = Mangling.getValue();
Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
HasKnownInternalLinkage);
+ Class->setDeviceLambdaManglingNumber(DeviceManglingNumber);
return;
}
@@ -476,6 +477,7 @@ void Sema::handleLambdaNumbering(
unsigned ManglingNumber = MCtx->getManglingNumber(Method);
Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
HasKnownInternalLinkage);
+ Class->setDeviceLambdaManglingNumber(MCtx->getDeviceManglingNumber(Method));
}
}
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 78707484f588..4063c185388d 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -1884,8 +1884,7 @@ void Sema::popOpenMPFunctionRegion(const FunctionScopeInfo *OldFSI) {
static bool isOpenMPDeviceDelayedContext(Sema &S) {
assert(S.LangOpts.OpenMP && S.LangOpts.OpenMPIsDevice &&
"Expected OpenMP device compilation.");
- return !S.isInOpenMPTargetExecutionDirective() &&
- !S.isInOpenMPDeclareTargetContext();
+ return !S.isInOpenMPTargetExecutionDirective();
}
namespace {
@@ -1898,11 +1897,11 @@ enum class FunctionEmissionStatus {
} // anonymous namespace
Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc,
- unsigned DiagID) {
+ unsigned DiagID,
+ FunctionDecl *FD) {
assert(LangOpts.OpenMP && LangOpts.OpenMPIsDevice &&
"Expected OpenMP device compilation.");
- FunctionDecl *FD = getCurFunctionDecl();
SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop;
if (FD) {
FunctionEmissionStatus FES = getEmissionStatus(FD);
@@ -1911,6 +1910,13 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc,
Kind = SemaDiagnosticBuilder::K_Immediate;
break;
case FunctionEmissionStatus::Unknown:
+ // TODO: We should always delay diagnostics here in case a target
+ // region is in a function we do not emit. However, as the
+ // current diagnostics are associated with the function containing
+ // the target region and we do not emit that one, we would miss out
+ // on diagnostics for the target region itself. We need to anchor
+ // the diagnostics with the new generated function *or* ensure we
+ // emit diagnostics associated with the surrounding function.
Kind = isOpenMPDeviceDelayedContext(*this)
? SemaDiagnosticBuilder::K_Deferred
: SemaDiagnosticBuilder::K_Immediate;
@@ -1925,14 +1931,15 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPDeviceCode(SourceLocation Loc,
}
}
- return SemaDiagnosticBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this);
+ return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this);
}
Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc,
- unsigned DiagID) {
+ unsigned DiagID,
+ FunctionDecl *FD) {
assert(LangOpts.OpenMP && !LangOpts.OpenMPIsDevice &&
"Expected OpenMP host compilation.");
- FunctionEmissionStatus FES = getEmissionStatus(getCurFunctionDecl());
+ FunctionEmissionStatus FES = getEmissionStatus(FD);
SemaDiagnosticBuilder::Kind Kind = SemaDiagnosticBuilder::K_Nop;
switch (FES) {
case FunctionEmissionStatus::Emitted:
@@ -1948,7 +1955,7 @@ Sema::SemaDiagnosticBuilder Sema::diagIfOpenMPHostCode(SourceLocation Loc,
break;
}
- return SemaDiagnosticBuilder(Kind, Loc, DiagID, getCurFunctionDecl(), *this);
+ return SemaDiagnosticBuilder(Kind, Loc, DiagID, FD, *this);
}
static OpenMPDefaultmapClauseKind
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 0a596e50658b..3c68f9458e58 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -12504,10 +12504,11 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
E->getCaptureDefault());
getDerived().transformedLocalDecl(OldClass, {Class});
- Optional<std::tuple<unsigned, bool, Decl *>> Mangling;
+ Optional<std::tuple<bool, unsigned, unsigned, Decl *>> Mangling;
if (getDerived().ReplacingOriginal())
- Mangling = std::make_tuple(OldClass->getLambdaManglingNumber(),
- OldClass->hasKnownLambdaInternalLinkage(),
+ Mangling = std::make_tuple(OldClass->hasKnownLambdaInternalLinkage(),
+ OldClass->getLambdaManglingNumber(),
+ OldClass->getDeviceLambdaManglingNumber(),
OldClass->getLambdaContextDecl());
// Build the call operator.
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 6bfb9bd783b5..18ab4666a7d8 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1748,6 +1748,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
Lambda.NumExplicitCaptures = Record.readInt();
Lambda.HasKnownInternalLinkage = Record.readInt();
Lambda.ManglingNumber = Record.readInt();
+ D->setDeviceLambdaManglingNumber(Record.readInt());
Lambda.ContextDecl = readDeclID();
Lambda.Captures = (Capture *)Reader.getContext().Allocate(
sizeof(Capture) * Lambda.NumCaptures);
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 6bfa7b0e7d6d..40900af6f9e0 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -5667,6 +5667,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
Record->push_back(Lambda.NumExplicitCaptures);
Record->push_back(Lambda.HasKnownInternalLinkage);
Record->push_back(Lambda.ManglingNumber);
+ Record->push_back(D->getDeviceLambdaManglingNumber());
AddDeclRef(D->getLambdaContextDecl());
AddTypeSourceInfo(Lambda.MethodTyInfo);
for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 7f7b38d4215b..068fc9829e57 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -226,7 +226,7 @@
(SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
#define SANITIZER_INTERCEPT_GETPWENT \
(SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
-#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_FREEBSD || SI_GLIBC || SI_SOLARIS)
+#define SANITIZER_INTERCEPT_FGETGRENT_R (SI_GLIBC || SI_SOLARIS)
#define SANITIZER_INTERCEPT_FGETPWENT SI_LINUX_NOT_ANDROID || SI_SOLARIS
#define SANITIZER_INTERCEPT_GETPWENT_R \
(SI_FREEBSD || SI_NETBSD || SI_GLIBC || SI_SOLARIS)
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index a2da7d78049f..77e5faab2676 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -21,30 +21,30 @@
#include <locale.h>
#if defined(_LIBCPP_MSVCRT_LIKE)
# include <cstring>
-# include <support/win32/locale_win32.h>
+# include <__support/win32/locale_win32.h>
#elif defined(__NuttX__)
-# include <support/nuttx/xlocale.h>
+# include <__support/nuttx/xlocale.h>
#elif defined(_AIX) || defined(__MVS__)
-# include <support/ibm/xlocale.h>
+# include <__support/ibm/xlocale.h>
#elif defined(__ANDROID__)
-# include <support/android/locale_bionic.h>
+# include <__support/android/locale_bionic.h>
#elif defined(__sun__)
# include <xlocale.h>
-# include <support/solaris/xlocale.h>
+# include <__support/solaris/xlocale.h>
#elif defined(_NEWLIB_VERSION)
-# include <support/newlib/xlocale.h>
+# include <__support/newlib/xlocale.h>
#elif defined(__OpenBSD__)
-# include <support/openbsd/xlocale.h>
+# include <__support/openbsd/xlocale.h>
#elif (defined(__APPLE__) || defined(__FreeBSD__) \
|| defined(__EMSCRIPTEN__) || defined(__IBMCPP__))
# include <xlocale.h>
#elif defined(__Fuchsia__)
-# include <support/fuchsia/xlocale.h>
+# include <__support/fuchsia/xlocale.h>
#elif defined(__wasi__)
// WASI libc uses musl's locales support.
-# include <support/musl/xlocale.h>
+# include <__support/musl/xlocale.h>
#elif defined(_LIBCPP_HAS_MUSL_LIBC)
-# include <support/musl/xlocale.h>
+# include <__support/musl/xlocale.h>
#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/__threading_support b/libcxx/include/__threading_support
index 473c9c3bbe49..de572f3ff84d 100644
--- a/libcxx/include/__threading_support
+++ b/libcxx/include/__threading_support
@@ -17,7 +17,7 @@
#include <errno.h>
#ifdef __MVS__
-# include <support/ibm/nanosleep.h>
+# include <__support/ibm/nanosleep.h>
#endif
#ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER
diff --git a/libcxx/include/bit b/libcxx/include/bit
index fe360179c5ca..f8c37c3d6bbf 100644
--- a/libcxx/include/bit
+++ b/libcxx/include/bit
@@ -62,7 +62,7 @@ namespace std {
#include <__debug>
#if defined(__IBMCPP__)
-#include "support/ibm/support.h"
+#include "__support/ibm/support.h"
#endif
#if defined(_LIBCPP_COMPILER_MSVC)
#include <intrin.h>
diff --git a/libcxx/include/limits b/libcxx/include/limits
index 6d5d1e1aca75..8f97cd10a8b1 100644
--- a/libcxx/include/limits
+++ b/libcxx/include/limits
@@ -105,11 +105,11 @@ template<> class numeric_limits<cv long double>;
#include <type_traits>
#if defined(_LIBCPP_COMPILER_MSVC)
-#include "support/win32/limits_msvc_win32.h"
+#include "__support/win32/limits_msvc_win32.h"
#endif // _LIBCPP_MSVCRT
#if defined(__IBMCPP__)
-#include "support/ibm/limits.h"
+#include "__support/ibm/limits.h"
#endif // __IBMCPP__
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
diff --git a/libcxx/include/memory b/libcxx/include/memory
index a00916c8c03f..39d0f5bee6a5 100644
--- a/libcxx/include/memory
+++ b/libcxx/include/memory
@@ -2647,7 +2647,7 @@ private:
_Alloc *__alloc = reinterpret_cast<_Alloc*>(__first);
return __alloc;
}
- _Tp* __get_elem() _NOEXCEPT {
+ _LIBCPP_NO_CFI _Tp* __get_elem() _NOEXCEPT {
_CompressedPair *__as_pair = reinterpret_cast<_CompressedPair*>(__blob_);
typename _CompressedPair::_Base2* __second = _CompressedPair::__get_second_base(__as_pair);
_Tp *__elem = reinterpret_cast<_Tp*>(__second);
diff --git a/libcxx/src/atomic.cpp b/libcxx/src/atomic.cpp
index 6b73ed771cd1..9ae1fb5199bf 100644
--- a/libcxx/src/atomic.cpp
+++ b/libcxx/src/atomic.cpp
@@ -19,6 +19,12 @@
#include <linux/futex.h>
#include <sys/syscall.h>
+// libc++ uses SYS_futex as a universal syscall name. However, on 32 bit architectures
+// with a 64 bit time_t, we need to specify SYS_futex_time64.
+#if !defined(SYS_futex) && defined(SYS_futex_time64)
+# define SYS_futex SYS_futex_time64
+#endif
+
#else // <- Add other operating systems here
// Baseline needs no new headers
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index f109389f68f3..a0209d0ce8cf 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -29,7 +29,7 @@
#include "cwctype"
#include "__sso_allocator"
#if defined(_LIBCPP_MSVCRT) || defined(__MINGW32__)
-#include "support/win32/locale_win32.h"
+#include "__support/win32/locale_win32.h"
#elif !defined(__BIONIC__) && !defined(__NuttX__)
#include <langinfo.h>
#endif
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index f40bb258b9af..6f16fc7abc48 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -901,7 +901,10 @@ void InputSection::relocateNonAlloc(uint8_t *buf, ArrayRef<RelTy> rels) {
continue;
}
- if (expr != R_ABS && expr != R_DTPREL && expr != R_RISCV_ADD) {
+ // R_ABS/R_DTPREL and some other relocations can be used from non-SHF_ALLOC
+ // sections.
+ if (expr != R_ABS && expr != R_DTPREL && expr != R_GOTPLTREL &&
+ expr != R_RISCV_ADD) {
std::string msg = getLocation<ELFT>(offset) +
": has non-ABS relocation " + toString(type) +
" against symbol '" + toString(sym) + "'";
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index e0b17ca3e030..ea1403888eba 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -35,12 +35,28 @@ Breaking changes
COFF Improvements
-----------------
-* ...
+* Error out clearly if creating a DLL with too many exported symbols.
+ (`D86701 <https://reviews.llvm.org/D86701>`_)
MinGW Improvements
------------------
-* ...
+* Enabled dynamicbase by default. (`D86654 <https://reviews.llvm.org/D86654>`_)
+
+* Tolerate mismatches between COMDAT section sizes with different amount of
+ padding (produced by binutils) by inspecting the aux section definition.
+ (`D86659 <https://reviews.llvm.org/D86659>`_)
+
+* Support setting the subsystem version via the subsystem argument.
+ (`D88804 <https://reviews.llvm.org/D88804>`_)
+
+* Implemented the GNU -wrap option.
+ (`D89004 <https://reviews.llvm.org/D89004>`_,
+ `D91689 <https://reviews.llvm.org/D91689>`_)
+
+* Handle the ``--demangle`` and ``--no-demangle`` options.
+ (`D93950 <https://reviews.llvm.org/D93950>`_)
+
MachO Improvements
------------------
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 8274213aa839..a78df16ca404 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -160,10 +160,10 @@ typedef enum {
LLVMVectorTypeKind, /**< Fixed width SIMD vector type */
LLVMMetadataTypeKind, /**< Metadata */
LLVMX86_MMXTypeKind, /**< X86 MMX */
- LLVMX86_AMXTypeKind, /**< X86 AMX */
LLVMTokenTypeKind, /**< Tokens */
LLVMScalableVectorTypeKind, /**< Scalable SIMD vector type */
- LLVMBFloatTypeKind /**< 16 bit brain floating point type */
+ LLVMBFloatTypeKind, /**< 16 bit brain floating point type */
+ LLVMX86_AMXTypeKind /**< X86 AMX */
} LLVMTypeKind;
typedef enum {
@@ -270,7 +270,6 @@ typedef enum {
LLVMConstantVectorValueKind,
LLVMUndefValueValueKind,
- LLVMPoisonValueValueKind,
LLVMConstantAggregateZeroValueKind,
LLVMConstantDataArrayValueKind,
LLVMConstantDataVectorValueKind,
@@ -283,6 +282,7 @@ typedef enum {
LLVMInlineAsmValueKind,
LLVMInstructionValueKind,
+ LLVMPoisonValueValueKind
} LLVMValueKind;
typedef enum {
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 0ef63dc68e1c..c4602d3449c0 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -45,7 +45,7 @@ public:
enum : unsigned { ExprResultIdx = std::numeric_limits<unsigned>::max() };
struct ResultElem {
- WeakTrackingVH Assume;
+ WeakVH Assume;
/// contains either ExprResultIdx or the index of the operand bundle
/// containing the knowledge.
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c3221aac8eea..40115fbd2f15 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -2785,6 +2785,10 @@ public:
return false;
}
+ /// Does this target require the clearing of high-order bits in a register
+ /// passed to the fp16 to fp conversion library function.
+ virtual bool shouldKeepZExtForFP16Conv() const { return false; }
+
//===--------------------------------------------------------------------===//
// Runtime Library hooks
//
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 6bfc02d15379..e5fca98f9271 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -96,7 +96,6 @@
X(InitListExpr) \
X(FoldExpr) \
X(ThrowExpr) \
- X(UUIDOfExpr) \
X(BoolExpr) \
X(StringLiteral) \
X(LambdaExpr) \
@@ -2035,21 +2034,6 @@ public:
}
};
-// MSVC __uuidof extension, generated by clang in -fms-extensions mode.
-class UUIDOfExpr : public Node {
- Node *Operand;
-public:
- UUIDOfExpr(Node *Operand_) : Node(KUUIDOfExpr), Operand(Operand_) {}
-
- template<typename Fn> void match(Fn F) const { F(Operand); }
-
- void printLeft(OutputStream &S) const override {
- S << "__uuidof(";
- Operand->print(S);
- S << ")";
- }
-};
-
class BoolExpr : public Node {
bool Value;
@@ -5013,6 +4997,43 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
}
}
return nullptr;
+ case 'u': {
+ ++First;
+ Node *Name = getDerived().parseSourceName(/*NameState=*/nullptr);
+ if (!Name)
+ return nullptr;
+ // Special case legacy __uuidof mangling. The 't' and 'z' appear where the
+ // standard encoding expects a <template-arg>, and would be otherwise be
+ // interpreted as <type> node 'short' or 'ellipsis'. However, neither
+ // __uuidof(short) nor __uuidof(...) can actually appear, so there is no
+ // actual conflict here.
+ if (Name->getBaseName() == "__uuidof") {
+ if (numLeft() < 2)
+ return nullptr;
+ if (*First == 't') {
+ ++First;
+ Node *Ty = getDerived().parseType();
+ if (!Ty)
+ return nullptr;
+ return make<CallExpr>(Name, makeNodeArray(&Ty, &Ty + 1));
+ }
+ if (*First == 'z') {
+ ++First;
+ Node *Ex = getDerived().parseExpr();
+ if (!Ex)
+ return nullptr;
+ return make<CallExpr>(Name, makeNodeArray(&Ex, &Ex + 1));
+ }
+ }
+ size_t ExprsBegin = Names.size();
+ while (!consumeIf('E')) {
+ Node *E = getDerived().parseTemplateArg();
+ if (E == nullptr)
+ return E;
+ Names.push_back(E);
+ }
+ return make<CallExpr>(Name, popTrailingNodeArray(ExprsBegin));
+ }
case '1':
case '2':
case '3':
@@ -5024,21 +5045,6 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExpr() {
case '9':
return getDerived().parseUnresolvedName();
}
-
- if (consumeIf("u8__uuidoft")) {
- Node *Ty = getDerived().parseType();
- if (!Ty)
- return nullptr;
- return make<UUIDOfExpr>(Ty);
- }
-
- if (consumeIf("u8__uuidofz")) {
- Node *Ex = getDerived().parseExpr();
- if (!Ex)
- return nullptr;
- return make<UUIDOfExpr>(Ex);
- }
-
return nullptr;
}
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 9d68f3fdde6c..df3a1d568756 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -981,12 +981,16 @@ public:
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0)));
}
+ ConstantInt *getIndex() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+ }
+
ConstantInt *getAttributes() const {
return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
}
- ConstantInt *getIndex() const {
- return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+ ConstantInt *getFactor() const {
+ return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
}
};
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b2bfc6e6f9e6..21307ed1bd91 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1298,7 +1298,7 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int
// Like the sideeffect intrinsic defined above, this intrinsic is treated by the
// optimizer as having opaque side effects so that it won't be get rid of or moved
// out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
[IntrInaccessibleMemOnly, IntrWillReturn]>;
// Intrinsics to support half precision floating point format
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index ab5b09b72ac3..c4056895f68e 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -790,6 +790,9 @@ let TargetPrefix = "riscv" in {
defm vsoxei : RISCVIStore;
defm vsuxei : RISCVIStore;
+ def int_riscv_vle1 : RISCVUSLoad;
+ def int_riscv_vse1 : RISCVUSStore;
+
defm vamoswap : RISCVAMO;
defm vamoadd : RISCVAMO;
defm vamoxor : RISCVAMO;
@@ -940,8 +943,8 @@ let TargetPrefix = "riscv" in {
defm vfwnmsac : RISCVTernaryWide;
defm vfsqrt : RISCVUnaryAA;
- defm vfrsqrte7 : RISCVUnaryAA;
- defm vfrece7 : RISCVUnaryAA;
+ defm vfrsqrt7 : RISCVUnaryAA;
+ defm vfrec7 : RISCVUnaryAA;
defm vfmin : RISCVBinaryAAX;
defm vfmax : RISCVBinaryAAX;
diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index e0370c264102..5165e80caa2d 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -16,28 +16,39 @@
#include "llvm/ADT/Optional.h"
#include <cassert>
#include <cstdint>
+#include <limits>
namespace llvm {
class Instruction;
+class BasicBlock;
constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
+// The saturated distrution factor representing 100% for block probes.
+constexpr static uint64_t PseudoProbeFullDistributionFactor =
+ std::numeric_limits<uint64_t>::max();
+
struct PseudoProbeDwarfDiscriminator {
+public:
// The following APIs encodes/decodes per-probe information to/from a
// 32-bit integer which is organized as:
// [2:0] - 0x7, this is reserved for regular discriminator,
// see DWARF discriminator encoding rule
// [18:3] - probe id
- // [25:19] - reserved
+ // [25:19] - probe distribution factor
// [28:26] - probe type, see PseudoProbeType
// [31:29] - reserved for probe attributes
- static uint32_t packProbeData(uint32_t Index, uint32_t Type) {
+ static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags,
+ uint32_t Factor) {
assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16");
assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7");
- return (Index << 3) | (Type << 26) | 0x7;
+ assert(Flags <= 0x7);
+ assert(Factor <= 100 &&
+ "Probe distribution factor too big to encode, exceeding 100");
+ return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7;
}
static uint32_t extractProbeIndex(uint32_t Value) {
@@ -51,16 +62,26 @@ struct PseudoProbeDwarfDiscriminator {
static uint32_t extractProbeAttributes(uint32_t Value) {
return (Value >> 29) & 0x7;
}
+
+ static uint32_t extractProbeFactor(uint32_t Value) {
+ return (Value >> 19) & 0x7F;
+ }
+
+ // The saturated distrution factor representing 100% for callsites.
+ constexpr static uint8_t FullDistributionFactor = 100;
};
struct PseudoProbe {
uint32_t Id;
uint32_t Type;
uint32_t Attr;
+ float Factor;
};
Optional<PseudoProbe> extractProbe(const Instruction &Inst);
+void setProbeDistributionFactor(Instruction &Inst, float Factor);
+
} // end namespace llvm
#endif // LLVM_IR_PSEUDOPROBE_H
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 795a980878e2..61c86b0468f2 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -22,6 +22,7 @@
#include "llvm/IR/PassTimingInfo.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include <string>
#include <utility>
@@ -273,6 +274,7 @@ class StandardInstrumentations {
OptBisectInstrumentation OptBisect;
PreservedCFGCheckerInstrumentation PreservedCFGChecker;
IRChangedPrinter PrintChangedIR;
+ PseudoProbeVerifier PseudoProbeVerification;
VerifyInstrumentation Verify;
bool VerifyEach;
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index c45ace9e68c1..25d5b2376c11 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -347,6 +347,16 @@ public:
return SortedTargets;
}
+ /// Prorate call targets by a distribution factor.
+ static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
+ float DistributionFactor) {
+ CallTargetMap AdjustedTargets;
+ for (const auto &I : Targets) {
+ AdjustedTargets[I.first()] = I.second * DistributionFactor;
+ }
+ return AdjustedTargets;
+ }
+
/// Merge the samples in \p Other into this record.
/// Optionally scale sample counts by \p Weight.
sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {
@@ -439,9 +449,11 @@ public:
void clearState(ContextStateMask S) { State &= (uint32_t)~S; }
bool hasContext() const { return State != UnknownContext; }
bool isBaseContext() const { return CallingContext.empty(); }
- StringRef getName() const { return Name; }
+ StringRef getNameWithoutContext() const { return Name; }
StringRef getCallingContext() const { return CallingContext; }
- StringRef getNameWithContext() const { return FullContext; }
+ StringRef getNameWithContext(bool WithBracket = false) const {
+ return WithBracket ? InputContext : FullContext;
+ }
private:
// Give a context string, decode and populate internal states like
@@ -449,6 +461,7 @@ private:
// `ContextStr`: `[main:3 @ _Z5funcAi:1 @ _Z8funcLeafi]`
void setContext(StringRef ContextStr, ContextStateMask CState) {
assert(!ContextStr.empty());
+ InputContext = ContextStr;
// Note that `[]` wrapped input indicates a full context string, otherwise
// it's treated as context-less function name only.
bool HasContext = ContextStr.startswith("[");
@@ -480,6 +493,9 @@ private:
}
}
+ // Input context string including bracketed calling context and leaf function
+ // name
+ StringRef InputContext;
// Full context string including calling context and leaf function name
StringRef FullContext;
// Function name for the associated sample profile
@@ -676,7 +692,8 @@ public:
Name = Other.getName();
if (!GUIDToFuncNameMap)
GUIDToFuncNameMap = Other.GUIDToFuncNameMap;
-
+ if (Context.getNameWithContext(true).empty())
+ Context = Other.getContext();
if (FunctionHash == 0) {
// Set the function hash code for the target profile.
FunctionHash = Other.getFunctionHash();
@@ -743,8 +760,10 @@ public:
StringRef getName() const { return Name; }
/// Return function name with context.
- StringRef getNameWithContext() const {
- return FunctionSamples::ProfileIsCS ? Context.getNameWithContext() : Name;
+ StringRef getNameWithContext(bool WithBracket = false) const {
+ return FunctionSamples::ProfileIsCS
+ ? Context.getNameWithContext(WithBracket)
+ : Name;
}
/// Return the original function name.
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 3f52a2f6163b..999e75eddffa 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -488,8 +488,12 @@ protected:
/// \brief Whether samples are collected based on pseudo probes.
bool ProfileIsProbeBased = false;
+ /// Whether function profiles are context-sensitive.
bool ProfileIsCS = false;
+ /// Number of context-sensitive profiles.
+ uint32_t CSProfileCount = 0;
+
/// \brief The format of sample.
SampleProfileFormat Format = SPF_None;
};
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 38f3e188be55..0706aa226c0e 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -369,9 +369,22 @@ public:
virtual void setDefault() = 0;
+ // Prints the help string for an option.
+ //
+ // This maintains the Indent for multi-line descriptions.
+ // FirstLineIndentedBy is the count of chars of the first line
+ // i.e. the one containing the --<option name>.
static void printHelpStr(StringRef HelpStr, size_t Indent,
size_t FirstLineIndentedBy);
+ // Prints the help string for an enum value.
+ //
+ // This maintains the Indent for multi-line descriptions.
+ // FirstLineIndentedBy is the count of chars of the first line
+ // i.e. the one containing the =<value>.
+ static void printEnumValHelpStr(StringRef HelpStr, size_t Indent,
+ size_t FirstLineIndentedBy);
+
virtual void getExtraOptionNames(SmallVectorImpl<StringRef> &) {}
// addOccurrence - Wrapper around handleOccurrence that enforces Flags.
diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 5b2600144fa3..526e141838c4 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -23,6 +23,7 @@
#include "llvm/ProfileData/SampleProf.h"
#include <list>
#include <map>
+#include <vector>
using namespace llvm;
using namespace sampleprof;
@@ -42,7 +43,7 @@ public:
CallSiteLoc(CallLoc){};
ContextTrieNode *getChildContext(const LineLocation &CallSite,
StringRef CalleeName);
- ContextTrieNode *getChildContext(const LineLocation &CallSite);
+ ContextTrieNode *getHottestChildContext(const LineLocation &CallSite);
ContextTrieNode *getOrCreateChildContext(const LineLocation &CallSite,
StringRef CalleeName,
bool AllowCreate = true);
@@ -94,6 +95,9 @@ public:
// call-site. The full context is identified by location of call instruction.
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
StringRef CalleeName);
+ // Get samples for indirect call targets for call site at given location.
+ std::vector<const FunctionSamples *>
+ getIndirectCalleeContextSamplesFor(const DILocation *DIL);
// Query context profile for a given location. The full context
// is identified by input DILocation.
FunctionSamples *getContextSamplesFor(const DILocation *DIL);
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 78117fd4a9c2..cab893b50d19 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -16,6 +16,10 @@
#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/ProfileData/SampleProf.h"
@@ -29,6 +33,8 @@ class Module;
using namespace sampleprof;
using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
+using ProbeFactorMap = std::unordered_map<uint64_t, float>;
+using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
@@ -43,6 +49,33 @@ public:
uint64_t getFunctionHash() const { return FunctionHash; }
};
+// A pseudo probe verifier that can be run after each IR passes to detect the
+// violation of updating probe factors. In principle, the sum of distribution
+// factor for a probe should be identical before and after a pass. For a
+// function pass, the factor sum for a probe would be typically 100%.
+class PseudoProbeVerifier {
+public:
+ void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+ // Implementation of pass instrumentation callbacks for new pass manager.
+ void runAfterPass(StringRef PassID, Any IR);
+
+private:
+ // Allow a little bias due the rounding to integral factors.
+ constexpr static float DistributionFactorVariance = 0.02;
+ // Distribution factors from last pass.
+ FuncProbeFactorMap FunctionProbeFactors;
+
+ void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors);
+ void runAfterPass(const Module *M);
+ void runAfterPass(const LazyCallGraph::SCC *C);
+ void runAfterPass(const Function *F);
+ void runAfterPass(const Loop *L);
+ bool shouldVerifyFunction(const Function *F);
+ void verifyProbeFactors(const Function *F,
+ const ProbeFactorMap &ProbeFactors);
+};
+
// This class serves sample counts correlation for SampleProfileLoader by
// analyzing pseudo probes and their function descriptors injected by
// SampleProfileProber.
@@ -102,5 +135,13 @@ public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
+class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
+ void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
+
+public:
+ PseudoProbeUpdatePass() {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index 52dca7d378e1..4722b68e20e9 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -281,7 +281,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
// clobbers where they don't really exist at all. Please see D43269 for
// context.
switch (II->getIntrinsicID()) {
- case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::assume:
@@ -358,22 +357,6 @@ struct UpwardsMemoryQuery {
} // end anonymous namespace
-static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
- BatchAAResults &AA) {
- Instruction *Inst = MD->getMemoryInst();
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::lifetime_end: {
- MemoryLocation ArgLoc = MemoryLocation::getAfter(II->getArgOperand(1));
- return AA.alias(ArgLoc, Loc) == MustAlias;
- }
- default:
- return false;
- }
- }
- return false;
-}
-
template <typename AliasAnalysisType>
static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
const Instruction *I) {
@@ -1465,15 +1448,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
}
MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
- // If the lifetime of the pointer ends at this instruction, it's live on
- // entry.
- if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) {
- // Reset UpperBound to liveOnEntryDef's place in the stack
- UpperBound = 0;
- FoundClobberResult = true;
- LocInfo.AR = MustAlias;
- break;
- }
ClobberAlias CA = instructionClobbersQuery(MD, MU, UseMLOC, *AA);
if (CA.IsClobber) {
FoundClobberResult = true;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index df0219fcfa64..a9353bdfb780 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -968,10 +968,11 @@ bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI) {
if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
return false;
- // Check that the next block is the conditional branch target.
- if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
- return false;
- return true;
+ // Check that the next block is the conditional branch target. Also make sure
+ // that it isn't the same as the G_BR's target (otherwise, this will loop.)
+ MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
+ return BrCondTarget != MI.getOperand(0).getMBB() &&
+ MBB->isLayoutSuccessor(BrCondTarget);
}
void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 615bea2a4905..89670d708264 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -21174,7 +21174,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
SDValue N0 = N->getOperand(0);
// fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
- if (N0->getOpcode() == ISD::AND) {
+ if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
if (AndConst && AndConst->getAPIntValue() == 0xffff) {
return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 03cb108cc485..95dd55237e5f 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -630,7 +630,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored)) {
// Undefined behavior invoked - the destination type can't represent
// the input constant.
- return PoisonValue::get(DestTy);
+ return UndefValue::get(DestTy);
}
return ConstantInt::get(FPC->getContext(), IntVal);
}
@@ -916,7 +916,7 @@ Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
unsigned NumElts = ValTy->getNumElements();
if (CIdx->uge(NumElts))
- return PoisonValue::get(Val->getType());
+ return UndefValue::get(Val->getType());
SmallVector<Constant*, 16> Result;
Result.reserve(NumElts);
@@ -1151,21 +1151,23 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
}
case Instruction::SDiv:
case Instruction::UDiv:
- // X / undef -> poison
- // X / 0 -> poison
- if (match(C2, m_CombineOr(m_Undef(), m_Zero())))
- return PoisonValue::get(C2->getType());
+ // X / undef -> undef
+ if (isa<UndefValue>(C2))
+ return C2;
+ // undef / 0 -> undef
// undef / 1 -> undef
- if (match(C2, m_One()))
+ if (match(C2, m_Zero()) || match(C2, m_One()))
return C1;
// undef / X -> 0 otherwise
return Constant::getNullValue(C1->getType());
case Instruction::URem:
case Instruction::SRem:
- // X % undef -> poison
- // X % 0 -> poison
- if (match(C2, m_CombineOr(m_Undef(), m_Zero())))
- return PoisonValue::get(C2->getType());
+ // X % undef -> undef
+ if (match(C2, m_Undef()))
+ return C2;
+ // undef % 0 -> undef
+ if (match(C2, m_Zero()))
+ return C1;
// undef % X -> 0 otherwise
return Constant::getNullValue(C1->getType());
case Instruction::Or: // X | undef -> -1
@@ -1173,28 +1175,28 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
return C1;
return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
case Instruction::LShr:
- // X >>l undef -> poison
+ // X >>l undef -> undef
if (isa<UndefValue>(C2))
- return PoisonValue::get(C2->getType());
+ return C2;
// undef >>l 0 -> undef
if (match(C2, m_Zero()))
return C1;
// undef >>l X -> 0
return Constant::getNullValue(C1->getType());
case Instruction::AShr:
- // X >>a undef -> poison
+ // X >>a undef -> undef
if (isa<UndefValue>(C2))
- return PoisonValue::get(C2->getType());
+ return C2;
// undef >>a 0 -> undef
if (match(C2, m_Zero()))
return C1;
- // TODO: undef >>a X -> poison if the shift is exact
+ // TODO: undef >>a X -> undef if the shift is exact
// undef >>a X -> 0
return Constant::getNullValue(C1->getType());
case Instruction::Shl:
// X << undef -> undef
if (isa<UndefValue>(C2))
- return PoisonValue::get(C2->getType());
+ return C2;
// undef << 0 -> undef
if (match(C2, m_Zero()))
return C1;
@@ -1247,14 +1249,14 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
if (CI2->isOne())
return C1; // X / 1 == X
if (CI2->isZero())
- return PoisonValue::get(CI2->getType()); // X / 0 == poison
+ return UndefValue::get(CI2->getType()); // X / 0 == undef
break;
case Instruction::URem:
case Instruction::SRem:
if (CI2->isOne())
return Constant::getNullValue(CI2->getType()); // X % 1 == 0
if (CI2->isZero())
- return PoisonValue::get(CI2->getType()); // X % 0 == poison
+ return UndefValue::get(CI2->getType()); // X % 0 == undef
break;
case Instruction::And:
if (CI2->isZero()) return C2; // X & 0 == 0
@@ -1368,7 +1370,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
case Instruction::SDiv:
assert(!CI2->isZero() && "Div by zero handled above");
if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
- return PoisonValue::get(CI1->getType()); // MIN_INT / -1 -> poison
+ return UndefValue::get(CI1->getType()); // MIN_INT / -1 -> undef
return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
case Instruction::URem:
assert(!CI2->isZero() && "Div by zero handled above");
@@ -1376,7 +1378,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
case Instruction::SRem:
assert(!CI2->isZero() && "Div by zero handled above");
if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
- return PoisonValue::get(CI1->getType()); // MIN_INT % -1 -> poison
+ return UndefValue::get(CI1->getType()); // MIN_INT % -1 -> undef
return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
case Instruction::And:
return ConstantInt::get(CI1->getContext(), C1V & C2V);
@@ -1387,15 +1389,15 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
case Instruction::Shl:
if (C2V.ult(C1V.getBitWidth()))
return ConstantInt::get(CI1->getContext(), C1V.shl(C2V));
- return PoisonValue::get(C1->getType()); // too big shift is poison
+ return UndefValue::get(C1->getType()); // too big shift is undef
case Instruction::LShr:
if (C2V.ult(C1V.getBitWidth()))
return ConstantInt::get(CI1->getContext(), C1V.lshr(C2V));
- return PoisonValue::get(C1->getType()); // too big shift is poison
+ return UndefValue::get(C1->getType()); // too big shift is undef
case Instruction::AShr:
if (C2V.ult(C1V.getBitWidth()))
return ConstantInt::get(CI1->getContext(), C1V.ashr(C2V));
- return PoisonValue::get(C1->getType()); // too big shift is poison
+ return UndefValue::get(C1->getType()); // too big shift is undef
}
}
@@ -1441,7 +1443,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
// Fast path for splatted constants.
if (Constant *C2Splat = C2->getSplatValue()) {
if (Instruction::isIntDivRem(Opcode) && C2Splat->isNullValue())
- return PoisonValue::get(VTy);
+ return UndefValue::get(VTy);
if (Constant *C1Splat = C1->getSplatValue()) {
return ConstantVector::getSplat(
VTy->getElementCount(),
@@ -1458,9 +1460,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
Constant *LHS = ConstantExpr::getExtractElement(C1, ExtractIdx);
Constant *RHS = ConstantExpr::getExtractElement(C2, ExtractIdx);
- // If any element of a divisor vector is zero, the whole op is poison.
+ // If any element of a divisor vector is zero, the whole op is undef.
if (Instruction::isIntDivRem(Opcode) && RHS->isNullValue())
- return PoisonValue::get(VTy);
+ return UndefValue::get(VTy);
Result.push_back(ConstantExpr::get(Opcode, LHS, RHS));
}
@@ -2343,8 +2345,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
return PoisonValue::get(GEPTy);
if (isa<UndefValue>(C))
- // If inbounds, we can choose an out-of-bounds pointer as a base pointer.
- return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy);
+ return UndefValue::get(GEPTy);
Constant *Idx0 = cast<Constant>(Idxs[0]);
if (Idxs.size() == 1 && (Idx0->isNullValue() || isa<UndefValue>(Idx0)))
diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index 804214f06e7a..80d2963938d4 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -35,6 +35,9 @@ Optional<PseudoProbe> extractProbeFromDiscriminator(const Instruction &Inst) {
PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
Probe.Attr =
PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
+ Probe.Factor =
+ PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
+ (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
return Probe;
}
}
@@ -47,6 +50,8 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
Probe.Id = II->getIndex()->getZExtValue();
Probe.Type = (uint32_t)PseudoProbeType::Block;
Probe.Attr = II->getAttributes()->getZExtValue();
+ Probe.Factor = II->getFactor()->getZExtValue() /
+ (float)PseudoProbeFullDistributionFactor;
return Probe;
}
@@ -55,4 +60,40 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
return None;
}
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor) {
+ assert(Factor >= 0 && Factor <= 1 &&
+ "Distribution factor must be in [0, 1.0]");
+ if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+ IRBuilder<> Builder(&Inst);
+ uint64_t IntFactor = PseudoProbeFullDistributionFactor;
+ if (Factor < 1)
+ IntFactor *= Factor;
+ auto OrigFactor = II->getFactor()->getZExtValue();
+ if (IntFactor != OrigFactor)
+ II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
+ } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
+ if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+ const DILocation *DIL = DLoc;
+ auto Discriminator = DIL->getDiscriminator();
+ if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+ auto Index =
+ PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+ auto Type =
+ PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+ auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+ Discriminator);
+ // Round small factors to 0 to avoid over-counting.
+ uint32_t IntFactor =
+ PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+ if (Factor < 1)
+ IntFactor *= Factor;
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, Attr, IntFactor);
+ DIL = DIL->cloneWithDiscriminator(V);
+ Inst.setDebugLoc(DIL);
+ }
+ }
+ }
+}
} // namespace llvm
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 100e881c8fa8..6dd299ee9845 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1070,12 +1070,6 @@ void Verifier::visitDICompositeType(const DICompositeType &N) {
if (auto *Params = N.getRawTemplateParams())
visitTemplateParams(N, *Params);
- if (N.getTag() == dwarf::DW_TAG_class_type ||
- N.getTag() == dwarf::DW_TAG_union_type) {
- AssertDI(N.getFile() && !N.getFile()->getFilename().empty(),
- "class/union requires a filename", &N, N.getFile());
- }
-
if (auto *D = N.getRawDiscriminator()) {
AssertDI(isa<DIDerivedType>(D) && N.getTag() == dwarf::DW_TAG_variant_part,
"discriminator can only appear on variant part");
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index d4c4c6e01ef5..6c1a7c75d30a 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1423,6 +1423,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// Now add the optimization pipeline.
MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(PseudoProbeUpdatePass());
+
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1477,6 +1480,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
if (PTO.Coroutines)
MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
+ if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+ MPM.addPass(PseudoProbeUpdatePass());
+
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 860bfade733d..877cb9ed13b3 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f
MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
MODULE_PASS("memprof-module", ModuleMemProfilerPass())
MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
#undef MODULE_PASS
#ifndef CGSCC_ANALYSIS
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index a8bfe02d4432..6795aed7b04e 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks(
OptBisect.registerCallbacks(PIC);
PreservedCFGChecker.registerCallbacks(PIC);
PrintChangedIR.registerCallbacks(PIC);
+ PseudoProbeVerification.registerCallbacks(PIC);
if (VerifyEach)
Verify.registerCallbacks(PIC);
}
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index c42931174bc0..370ffc8e2885 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -222,8 +222,6 @@ std::error_code SampleProfileReaderText::readImpl() {
sampleprof_error Result = sampleprof_error::success;
InlineCallStack InlineStack;
- int CSProfileCount = 0;
- int RegularProfileCount = 0;
uint32_t ProbeProfileCount = 0;
// SeenMetadata tracks whether we have processed metadata for the current
@@ -257,11 +255,9 @@ std::error_code SampleProfileReaderText::readImpl() {
SampleContext FContext(FName);
if (FContext.hasContext())
++CSProfileCount;
- else
- ++RegularProfileCount;
Profiles[FContext] = FunctionSamples();
FunctionSamples &FProfile = Profiles[FContext];
- FProfile.setName(FContext.getName());
+ FProfile.setName(FContext.getNameWithoutContext());
FProfile.setContext(FContext);
MergeResult(Result, FProfile.addTotalSamples(NumSamples));
MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples));
@@ -324,13 +320,14 @@ std::error_code SampleProfileReaderText::readImpl() {
}
}
- assert((RegularProfileCount == 0 || CSProfileCount == 0) &&
+ assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
"Cannot have both context-sensitive and regular profile");
ProfileIsCS = (CSProfileCount > 0);
assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
"Cannot have both probe-based profiles and regular profiles");
ProfileIsProbeBased = (ProbeProfileCount > 0);
FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
+ FunctionSamples::ProfileIsCS = ProfileIsCS;
if (Result == sampleprof_error::success)
computeSummary();
@@ -546,12 +543,16 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
if (std::error_code EC = FName.getError())
return EC;
- Profiles[*FName] = FunctionSamples();
- FunctionSamples &FProfile = Profiles[*FName];
- FProfile.setName(*FName);
-
+ SampleContext FContext(*FName);
+ Profiles[FContext] = FunctionSamples();
+ FunctionSamples &FProfile = Profiles[FContext];
+ FProfile.setName(FContext.getNameWithoutContext());
+ FProfile.setContext(FContext);
FProfile.addHeadSamples(*NumHeadSamples);
+ if (FContext.hasContext())
+ CSProfileCount++;
+
if (std::error_code EC = readProfile(FProfile))
return EC;
return sampleprof_error::success;
@@ -654,40 +655,44 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
return EC;
}
assert(Data == End && "More data is read than expected");
- return sampleprof_error::success;
- }
-
- if (Remapper) {
- for (auto Name : FuncsToUse) {
- Remapper->insert(Name);
+ } else {
+ if (Remapper) {
+ for (auto Name : FuncsToUse) {
+ Remapper->insert(Name);
+ }
}
- }
- if (useMD5()) {
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else {
- for (auto NameOffset : FuncOffsetTable) {
- auto FuncName = NameOffset.first;
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ } else {
+ for (auto NameOffset : FuncOffsetTable) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getNameWithoutContext();
+ if (!FuncsToUse.count(FuncName) &&
+ (!Remapper || !Remapper->exist(FuncName)))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
}
+ Data = End;
}
- Data = End;
+ assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
+ "Cannot have both context-sensitive and regular profile");
+ ProfileIsCS = (CSProfileCount > 0);
+ FunctionSamples::ProfileIsCS = ProfileIsCS;
return sampleprof_error::success;
}
@@ -878,7 +883,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
if (!ProfileIsProbeBased)
return sampleprof_error::success;
- for (unsigned I = 0; I < Profiles.size(); ++I) {
+ while (Data < End) {
auto FName(readStringFromTable());
if (std::error_code EC = FName.getError())
return EC;
@@ -887,8 +892,14 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
if (std::error_code EC = Checksum.getError())
return EC;
- Profiles[*FName].setFunctionHash(*Checksum);
+ SampleContext FContext(*FName);
+ // No need to load metadata for profiles that are not loaded in the current
+ // module.
+ if (Profiles.count(FContext))
+ Profiles[FContext].setFunctionHash(*Checksum);
}
+
+ assert(Data == End && "More data is read than expected");
return sampleprof_error::success;
}
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 71dba6281f76..d3bc05e06fdf 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -147,7 +147,7 @@ std::error_code SampleProfileWriterExtBinaryBase::write(
std::error_code
SampleProfileWriterExtBinaryBase::writeSample(const FunctionSamples &S) {
uint64_t Offset = OutputStream->tell();
- StringRef Name = S.getName();
+ StringRef Name = S.getNameWithContext(true);
FuncOffsetTable[Name] = Offset - SecLBRProfileStart;
encodeULEB128(S.getHeadSamples(), *OutputStream);
return writeBody(S);
@@ -635,7 +635,7 @@ std::error_code SampleProfileWriterBinary::writeSummary() {
std::error_code SampleProfileWriterBinary::writeBody(const FunctionSamples &S) {
auto &OS = *OutputStream;
- if (std::error_code EC = writeNameIdx(S.getName()))
+ if (std::error_code EC = writeNameIdx(S.getNameWithContext(true)))
return EC;
encodeULEB128(S.getTotalSamples(), OS);
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 6d89481bf28a..e2f014d1815b 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1726,6 +1726,19 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
}
}
+void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
+ size_t FirstLineIndentedBy) {
+ const StringRef ValHelpPrefix = " ";
+ assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size());
+ std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
+ outs().indent(BaseIndent - FirstLineIndentedBy)
+ << ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";
+ while (!Split.second.empty()) {
+ Split = Split.second.split('\n');
+ outs().indent(BaseIndent + ValHelpPrefix.size()) << Split.first << "\n";
+ }
+}
+
// Print out the option for the alias.
void alias::printOptionInfo(size_t GlobalWidth) const {
outs() << PrintArg(ArgStr);
@@ -1971,17 +1984,17 @@ void generic_parser_base::printOptionInfo(const Option &O,
StringRef Description = getDescription(i);
if (!shouldPrintOption(OptionName, Description, O))
continue;
- assert(GlobalWidth >= OptionName.size() + OptionPrefixesSize);
- size_t NumSpaces = GlobalWidth - OptionName.size() - OptionPrefixesSize;
+ size_t FirstLineIndent = OptionName.size() + OptionPrefixesSize;
outs() << OptionPrefix << OptionName;
if (OptionName.empty()) {
outs() << EmptyOption;
- assert(NumSpaces >= EmptyOption.size());
- NumSpaces -= EmptyOption.size();
+ assert(FirstLineIndent >= EmptyOption.size());
+ FirstLineIndent += EmptyOption.size();
}
if (!Description.empty())
- outs().indent(NumSpaces) << ArgHelpPrefix << " " << Description;
- outs() << '\n';
+ Option::printEnumValHelpStr(Description, GlobalWidth, FirstLineIndent);
+ else
+ outs() << '\n';
}
} else {
if (!O.HelpStr.empty())
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 37c924d879b1..68c721cb0d72 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -111,7 +111,7 @@ AArch64MCAsmInfoMicrosoftCOFF::AArch64MCAsmInfoMicrosoftCOFF() {
SupportsDebugInformation = true;
CodePointerSize = 8;
- CommentString = ";";
+ CommentString = "//";
ExceptionsType = ExceptionHandling::WinEH;
WinEHEncodingType = WinEH::EncodingType::Itanium;
}
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 2628070f219c..cdb78aae1c4f 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -75,17 +75,19 @@ static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
MachineOperand &MovSrc = Def->getOperand(1);
bool ConstantFolded = false;
- if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
- isUInt<32>(MovSrc.getImm()))) {
- Src0.ChangeToImmediate(MovSrc.getImm());
- ConstantFolded = true;
- } else if (MovSrc.isFI()) {
- Src0.ChangeToFrameIndex(MovSrc.getIndex());
- ConstantFolded = true;
- } else if (MovSrc.isGlobal()) {
- Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
- MovSrc.getTargetFlags());
- ConstantFolded = true;
+ if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
+ if (MovSrc.isImm() &&
+ (isInt<32>(MovSrc.getImm()) || isUInt<32>(MovSrc.getImm()))) {
+ Src0.ChangeToImmediate(MovSrc.getImm());
+ ConstantFolded = true;
+ } else if (MovSrc.isFI()) {
+ Src0.ChangeToFrameIndex(MovSrc.getIndex());
+ ConstantFolded = true;
+ } else if (MovSrc.isGlobal()) {
+ Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
+ MovSrc.getTargetFlags());
+ ConstantFolded = true;
+ }
}
if (ConstantFolded) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 397979b4ab1e..598062672a56 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18661,6 +18661,8 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
: AtomicExpansionKind::None;
}
+// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
+// bits, and up to 64 bits on the non-M profiles.
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -18668,9 +18670,11 @@ ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
+ unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
bool HasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
- if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
+ if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg &&
+ Size <= (Subtarget->isMClass() ? 32U : 64U))
return AtomicExpansionKind::LLSC;
return AtomicExpansionKind::None;
}
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9215c17cb94b..929a72ac687e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -8604,16 +8604,20 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// If it is a splat of a double, check if we can shrink it to a 32 bit
// non-denormal float which when converted back to double gives us the same
- // double. This is to exploit the XXSPLTIDP instruction.+ // If we lose precision, we use XXSPLTI32DX.
+ // double. This is to exploit the XXSPLTIDP instruction.
+ // If we lose precision, we use XXSPLTI32DX.
if (BVNIsConstantSplat && (SplatBitSize == 64) &&
Subtarget.hasPrefixInstrs()) {
- if (convertToNonDenormSingle(APSplatBits) &&
- (Op->getValueType(0) == MVT::v2f64)) {
+ // Check the type first to short-circuit so we don't modify APSplatBits if
+ // this block isn't executed.
+ if ((Op->getValueType(0) == MVT::v2f64) &&
+ convertToNonDenormSingle(APSplatBits)) {
SDValue SplatNode = DAG.getNode(
PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
return DAG.getBitcast(Op.getValueType(), SplatNode);
- } else { // We may lose precision, so we have to use XXSPLTI32DX.
+ } else {
+ // We may lose precision, so we have to use XXSPLTI32DX.
uint32_t Hi =
(uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 477105bd03ac..0dda2c181572 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -987,6 +987,9 @@ namespace llvm {
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
+ // Keep the zero-extensions for arguments to libcalls.
+ bool shouldKeepZExtForFP16Conv() const override { return true; }
+
/// createFastISel - This method returns a target-specific FastISel object,
/// or null if the target does not support "fast" instruction selection.
FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index e7e590153605..dcf7525d7458 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -2126,7 +2126,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
if (getFeatureBits(RISCV::FeatureStdExtB))
formalArchStr = (Twine(formalArchStr) + "_b0p93").str();
if (getFeatureBits(RISCV::FeatureStdExtV))
- formalArchStr = (Twine(formalArchStr) + "_v1p0").str();
+ formalArchStr = (Twine(formalArchStr) + "_v0p10").str();
if (getFeatureBits(RISCV::FeatureExtZfh))
formalArchStr = (Twine(formalArchStr) + "_zfh0p1").str();
if (getFeatureBits(RISCV::FeatureExtZba))
@@ -2152,9 +2152,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
if (getFeatureBits(RISCV::FeatureExtZbt))
formalArchStr = (Twine(formalArchStr) + "_zbt0p93").str();
if (getFeatureBits(RISCV::FeatureExtZvamo))
- formalArchStr = (Twine(formalArchStr) + "_zvamo1p0").str();
+ formalArchStr = (Twine(formalArchStr) + "_zvamo0p10").str();
if (getFeatureBits(RISCV::FeatureStdExtZvlsseg))
- formalArchStr = (Twine(formalArchStr) + "_zvlsseg1p0").str();
+ formalArchStr = (Twine(formalArchStr) + "_zvlsseg0p10").str();
getTargetStreamer().emitTextAttribute(Tag, formalArchStr);
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 72434a15bedb..13c4b84aa300 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -63,7 +63,7 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
if (STI.hasFeature(RISCV::FeatureStdExtB))
Arch += "_b0p93";
if (STI.hasFeature(RISCV::FeatureStdExtV))
- Arch += "_v1p0";
+ Arch += "_v0p10";
if (STI.hasFeature(RISCV::FeatureExtZfh))
Arch += "_zfh0p1";
if (STI.hasFeature(RISCV::FeatureExtZba))
@@ -89,9 +89,9 @@ void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
if (STI.hasFeature(RISCV::FeatureExtZbt))
Arch += "_zbt0p93";
if (STI.hasFeature(RISCV::FeatureExtZvamo))
- Arch += "_zvamo1p0";
+ Arch += "_zvamo0p10";
if (STI.hasFeature(RISCV::FeatureStdExtZvlsseg))
- Arch += "_zvlsseg1p0";
+ Arch += "_zvlsseg0p10";
emitTextAttribute(RISCVAttrs::ARCH, Arch);
}
diff --git a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
index 6a12f99b8903..ae32cbd1ae59 100644
--- a/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCleanupVSETVLI.cpp
@@ -59,7 +59,8 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE;) {
MachineInstr &MI = *MII++;
- if (MI.getOpcode() != RISCV::PseudoVSETVLI) {
+ if (MI.getOpcode() != RISCV::PseudoVSETVLI &&
+ MI.getOpcode() != RISCV::PseudoVSETIVLI) {
if (PrevVSETVLI &&
(MI.isCall() || MI.modifiesRegister(RISCV::VL) ||
MI.modifiesRegister(RISCV::VTYPE))) {
@@ -69,26 +70,48 @@ bool RISCVCleanupVSETVLI::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
continue;
}
- // If we don't have a previous VSETVLI or the VL output isn't dead, we
+ // If we don't have a previous VSET{I}VLI or the VL output isn't dead, we
// can't remove this VSETVLI.
if (!PrevVSETVLI || !MI.getOperand(0).isDead()) {
PrevVSETVLI = &MI;
continue;
}
- Register PrevAVLReg = PrevVSETVLI->getOperand(1).getReg();
- Register AVLReg = MI.getOperand(1).getReg();
+ // If a previous "set vl" instruction opcode is different from this one, we
+ // can't differentiate the AVL values.
+ if (PrevVSETVLI->getOpcode() != MI.getOpcode()) {
+ PrevVSETVLI = &MI;
+ continue;
+ }
+
+ // The remaining two cases are
+ // 1. PrevVSETVLI = PseudoVSETVLI
+ // MI = PseudoVSETVLI
+ //
+ // 2. PrevVSETVLI = PseudoVSETIVLI
+ // MI = PseudoVSETIVLI
+ Register AVLReg;
+ bool SameAVL = false;
+ if (MI.getOpcode() == RISCV::PseudoVSETVLI) {
+ AVLReg = MI.getOperand(1).getReg();
+ SameAVL = PrevVSETVLI->getOperand(1).getReg() == AVLReg;
+ } else { // RISCV::PseudoVSETIVLI
+ SameAVL =
+ PrevVSETVLI->getOperand(1).getImm() == MI.getOperand(1).getImm();
+ }
int64_t PrevVTYPEImm = PrevVSETVLI->getOperand(2).getImm();
int64_t VTYPEImm = MI.getOperand(2).getImm();
- // Does this VSETVLI use the same AVL register and VTYPE immediate?
- if (PrevAVLReg != AVLReg || PrevVTYPEImm != VTYPEImm) {
+ // Does this VSET{I}VLI use the same AVL register/value and VTYPE immediate?
+ if (!SameAVL || PrevVTYPEImm != VTYPEImm) {
PrevVSETVLI = &MI;
continue;
}
// If the AVLReg is X0 we need to look at the output VL of both VSETVLIs.
- if (AVLReg == RISCV::X0) {
+ if ((MI.getOpcode() == RISCV::PseudoVSETVLI) && (AVLReg == RISCV::X0)) {
+ assert((PrevVSETVLI->getOpcode() == RISCV::PseudoVSETVLI) &&
+ "Unexpected vsetvli opcode.");
Register PrevOutVL = PrevVSETVLI->getOperand(0).getReg();
Register OutVL = MI.getOperand(0).getReg();
// We can't remove if the previous VSETVLI left VL unchanged and the
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 5f50892ca886..ec9a39569952 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -103,6 +103,7 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoLA_TLS_GD:
return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
case RISCV::PseudoVSETVLI:
+ case RISCV::PseudoVSETIVLI:
return expandVSetVL(MBB, MBBI);
case RISCV::PseudoVMCLR_M_B1:
case RISCV::PseudoVMCLR_M_B2:
@@ -217,9 +218,15 @@ bool RISCVExpandPseudo::expandVSetVL(MachineBasicBlock &MBB,
DebugLoc DL = MBBI->getDebugLoc();
- assert(MBBI->getOpcode() == RISCV::PseudoVSETVLI &&
+ assert((MBBI->getOpcode() == RISCV::PseudoVSETVLI ||
+ MBBI->getOpcode() == RISCV::PseudoVSETIVLI) &&
"Unexpected pseudo instruction");
- const MCInstrDesc &Desc = TII->get(RISCV::VSETVLI);
+ unsigned Opcode;
+ if (MBBI->getOpcode() == RISCV::PseudoVSETVLI)
+ Opcode = RISCV::VSETVLI;
+ else
+ Opcode = RISCV::VSETIVLI;
+ const MCInstrDesc &Desc = TII->get(Opcode);
assert(Desc.getNumOperands() == 3 && "Unexpected instruction format");
Register DstReg = MBBI->getOperand(0).getReg();
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 7b0f38671f06..43bf16c53a62 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -569,12 +569,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue VLOperand = Node->getOperand(2);
if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
- if (C->isNullValue()) {
- VLOperand = SDValue(
- CurDAG->getMachineNode(RISCV::ADDI, DL, XLenVT,
- CurDAG->getRegister(RISCV::X0, XLenVT),
- CurDAG->getTargetConstant(0, DL, XLenVT)),
- 0);
+ uint64_t AVL = C->getZExtValue();
+ if (isUInt<5>(AVL)) {
+ SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
+ MVT::Other, VLImm, VTypeIOp,
+ /* Chain */ Node->getOperand(0)));
+ return;
}
}
@@ -824,93 +826,6 @@ bool RISCVDAGToDAGISel::MatchSRLIW(SDNode *N) const {
return (Mask | maskTrailingOnes<uint64_t>(ShAmt)) == 0xffffffff;
}
-// Check that it is a SLOI (Shift Left Ones Immediate). A PatFrag has already
-// determined it has the right structure:
-//
-// (OR (SHL RS1, VC2), VC1)
-//
-// Check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-// VC1 == maskTrailingOnes(VC2)
-//
-bool RISCVDAGToDAGISel::MatchSLOI(SDNode *N) const {
- assert(N->getOpcode() == ISD::OR);
- assert(N->getOperand(0).getOpcode() == ISD::SHL);
- assert(isa<ConstantSDNode>(N->getOperand(1)));
- assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
- SDValue Shl = N->getOperand(0);
- if (Subtarget->is64Bit()) {
- uint64_t VC1 = N->getConstantOperandVal(1);
- uint64_t VC2 = Shl.getConstantOperandVal(1);
- return VC1 == maskTrailingOnes<uint64_t>(VC2);
- }
-
- uint32_t VC1 = N->getConstantOperandVal(1);
- uint32_t VC2 = Shl.getConstantOperandVal(1);
- return VC1 == maskTrailingOnes<uint32_t>(VC2);
-}
-
-// Check that it is a SROI (Shift Right Ones Immediate). A PatFrag has already
-// determined it has the right structure:
-//
-// (OR (SRL RS1, VC2), VC1)
-//
-// Check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-// VC1 == maskLeadingOnes(VC2)
-//
-bool RISCVDAGToDAGISel::MatchSROI(SDNode *N) const {
- assert(N->getOpcode() == ISD::OR);
- assert(N->getOperand(0).getOpcode() == ISD::SRL);
- assert(isa<ConstantSDNode>(N->getOperand(1)));
- assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
- SDValue Srl = N->getOperand(0);
- if (Subtarget->is64Bit()) {
- uint64_t VC1 = N->getConstantOperandVal(1);
- uint64_t VC2 = Srl.getConstantOperandVal(1);
- return VC1 == maskLeadingOnes<uint64_t>(VC2);
- }
-
- uint32_t VC1 = N->getConstantOperandVal(1);
- uint32_t VC2 = Srl.getConstantOperandVal(1);
- return VC1 == maskLeadingOnes<uint32_t>(VC2);
-}
-
-// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64). A PatFrag
-// has already determined it has the right structure:
-//
-// (OR (SRL RS1, VC2), VC1)
-//
-// and then we check that VC1, the mask used to fill with ones, is compatible
-// with VC2, the shamt:
-//
-// VC2 < 32
-// VC1 == maskTrailingZeros<uint64_t>(32 - VC2)
-//
-bool RISCVDAGToDAGISel::MatchSROIW(SDNode *N) const {
- assert(N->getOpcode() == ISD::OR);
- assert(N->getOperand(0).getOpcode() == ISD::SRL);
- assert(isa<ConstantSDNode>(N->getOperand(1)));
- assert(isa<ConstantSDNode>(N->getOperand(0).getOperand(1)));
-
- // The IsRV64 predicate is checked after PatFrag predicates so we can get
- // here even on RV32.
- if (!Subtarget->is64Bit())
- return false;
-
- SDValue Srl = N->getOperand(0);
- uint64_t VC1 = N->getConstantOperandVal(1);
- uint64_t VC2 = Srl.getConstantOperandVal(1);
-
- // Immediate range should be enforced by uimm5 predicate.
- assert(VC2 < 32 && "Unexpected immediate");
- return VC1 == maskTrailingZeros<uint64_t>(32 - VC2);
-}
-
// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
// on RV64).
// SLLIUW is the same as SLLI except for the fact that it clears the bits
@@ -946,6 +861,23 @@ bool RISCVDAGToDAGISel::MatchSLLIUW(SDNode *N) const {
return (VC1 >> VC2) == UINT64_C(0xFFFFFFFF);
}
+// X0 has special meaning for vsetvl/vsetvli.
+// rd | rs1 | AVL value | Effect on vl
+//--------------------------------------------------------------
+// !X0 | X0 | VLMAX | Set vl to VLMAX
+// X0 | X0 | Value in vl | Keep current vl, just change vtype.
+bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
+ // If the VL value is a constant 0, manually select it to an ADDI with 0
+ // immediate to prevent the default selection path from matching it to X0.
+ auto *C = dyn_cast<ConstantSDNode>(N);
+ if (C && C->isNullValue())
+ VL = SDValue(selectImm(CurDAG, SDLoc(N), 0, Subtarget->getXLenVT()), 0);
+ else
+ VL = N;
+
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
if (N.getOpcode() != ISD::SPLAT_VECTOR &&
N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 23601c3b8f06..6099586d049d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -46,11 +46,10 @@ public:
bool SelectAddrFI(SDValue Addr, SDValue &Base);
bool MatchSRLIW(SDNode *N) const;
- bool MatchSLOI(SDNode *N) const;
- bool MatchSROI(SDNode *N) const;
- bool MatchSROIW(SDNode *N) const;
bool MatchSLLIUW(SDNode *N) const;
+ bool selectVLOp(SDValue N, SDValue &VL);
+
bool selectVSplat(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal);
bool selectVSplatUimm5(SDValue N, SDValue &SplatVal);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
index 147993127e78..80f46b73bfd7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
@@ -38,9 +38,11 @@ class RISCVLSUMOP<bits<5> val> {
bits<5> Value = val;
}
def LUMOPUnitStride : RISCVLSUMOP<0b00000>;
+def LUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
def LUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
def LUMOPUnitStrideFF: RISCVLSUMOP<0b10000>;
def SUMOPUnitStride : RISCVLSUMOP<0b00000>;
+def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>;
def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>;
class RISCVAMOOP<bits<5> val> {
@@ -63,10 +65,23 @@ def LSWidth8 : RISCVWidth<0b0000>;
def LSWidth16 : RISCVWidth<0b0101>;
def LSWidth32 : RISCVWidth<0b0110>;
def LSWidth64 : RISCVWidth<0b0111>;
-def LSWidth128 : RISCVWidth<0b1000>;
-def LSWidth256 : RISCVWidth<0b1101>;
-def LSWidth512 : RISCVWidth<0b1110>;
-def LSWidth1024 : RISCVWidth<0b1111>;
+
+class RVInstSetiVLi<dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<5> uimm;
+ bits<5> rd;
+ bits<10> vtypei;
+
+ let Inst{31} = 1;
+ let Inst{30} = 1;
+ let Inst{29-20} = vtypei{9-0};
+ let Inst{19-15} = uimm;
+ let Inst{14-12} = 0b111;
+ let Inst{11-7} = rd;
+ let Opcode = OPC_OP_V.Value;
+
+ let Defs = [VTYPE, VL];
+}
class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr>
: RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 1bc288b5177c..7888ac7bac8e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -45,25 +45,6 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
}];
}
-
-// Check that it is a SLOI (Shift Left Ones Immediate).
-def SLOIPat : PatFrag<(ops node:$A, node:$B),
- (or (shl node:$A, node:$B), imm), [{
- return MatchSLOI(N);
-}]>;
-
-// Check that it is a SROI (Shift Right Ones Immediate).
-def SROIPat : PatFrag<(ops node:$A, node:$B),
- (or (srl node:$A, node:$B), imm), [{
- return MatchSROI(N);
-}]>;
-
-// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
-def SROIWPat : PatFrag<(ops node:$A, node:$B),
- (or (srl node:$A, node:$B), imm), [{
- return MatchSROIW(N);
-}]>;
-
// Checks if this mask has a single 0 bit and cannot be used with ANDI.
def BCLRMask : ImmLeaf<XLenVT, [{
if (Subtarget->is64Bit())
@@ -210,11 +191,6 @@ def SH2ADD : ALU_rr<0b0010000, 0b100, "sh2add">, Sched<[]>;
def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">, Sched<[]>;
} // Predicates = [HasStdExtZba]
-let Predicates = [HasStdExtZbp] in {
-def SLO : ALU_rr<0b0010000, 0b001, "slo">, Sched<[]>;
-def SRO : ALU_rr<0b0010000, 0b101, "sro">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
let Predicates = [HasStdExtZbbOrZbp] in {
def ROL : ALU_rr<0b0110000, 0b001, "rol">, Sched<[]>;
def ROR : ALU_rr<0b0110000, 0b101, "ror">, Sched<[]>;
@@ -238,11 +214,6 @@ def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>;
def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
} // Predicates = [HasStdExtZbp]
-let Predicates = [HasStdExtZbp] in {
-def SLOI : RVBShift_ri<0b00100, 0b001, OPC_OP_IMM, "sloi">, Sched<[]>;
-def SROI : RVBShift_ri<0b00100, 0b101, OPC_OP_IMM, "sroi">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
-
let Predicates = [HasStdExtZbbOrZbp] in
def RORI : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">, Sched<[]>;
@@ -369,11 +340,6 @@ def SH2ADDUW : ALUW_rr<0b0010000, 0b100, "sh2add.uw">, Sched<[]>;
def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">, Sched<[]>;
} // Predicates = [HasStdExtZbb, IsRV64]
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SLOW : ALUW_rr<0b0010000, 0b001, "slow">, Sched<[]>;
-def SROW : ALUW_rr<0b0010000, 0b101, "srow">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
def ROLW : ALUW_rr<0b0110000, 0b001, "rolw">, Sched<[]>;
def RORW : ALUW_rr<0b0110000, 0b101, "rorw">, Sched<[]>;
@@ -395,11 +361,6 @@ let Predicates = [HasStdExtZbp, IsRV64] in {
def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
} // Predicates = [HasStdExtZbp, IsRV64]
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def SLOIW : RVBShiftW_ri<0b0010000, 0b001, OPC_OP_IMM_32, "sloiw">, Sched<[]>;
-def SROIW : RVBShiftW_ri<0b0010000, 0b101, OPC_OP_IMM_32, "sroiw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">, Sched<[]>;
@@ -673,13 +634,6 @@ def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>;
def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbbOrZbp]
-let Predicates = [HasStdExtZbp] in {
-def : Pat<(not (shiftop<shl> (not GPR:$rs1), GPR:$rs2)),
- (SLO GPR:$rs1, GPR:$rs2)>;
-def : Pat<(not (shiftop<srl> (not GPR:$rs1), GPR:$rs2)),
- (SRO GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp]
-
let Predicates = [HasStdExtZbbOrZbp] in {
def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
@@ -710,13 +664,6 @@ def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
(BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
}
-let Predicates = [HasStdExtZbp] in {
-def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
- (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
-def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
- (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
-} // Predicates = [HasStdExtZbp]
-
// There's no encoding for roli in the the 'B' extension as it can be
// implemented with rori by negating the immediate.
let Predicates = [HasStdExtZbbOrZbp] in {
@@ -936,13 +883,6 @@ def : Pat<(add (SLLIUWPat GPR:$rs1, (XLenVT 3)), GPR:$rs2),
(SH3ADDUW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZba, IsRV64]
-let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(not (shiftopw<riscv_sllw> (not GPR:$rs1), GPR:$rs2)),
- (SLOW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(not (shiftopw<riscv_srlw> (not GPR:$rs1), GPR:$rs2)),
- (SROW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
def : Pat<(riscv_rolw GPR:$rs1, GPR:$rs2),
(ROLW GPR:$rs1, GPR:$rs2)>;
@@ -983,13 +923,6 @@ def : Pat<(xor (assertsexti32 GPR:$rs1), BSETINVWMask:$mask),
} // Predicates = [HasStdExtZbs, IsRV64]
let Predicates = [HasStdExtZbp, IsRV64] in {
-def : Pat<(sext_inreg (SLOIPat GPR:$rs1, uimm5:$shamt), i32),
- (SLOIW GPR:$rs1, uimm5:$shamt)>;
-def : Pat<(SROIWPat GPR:$rs1, uimm5:$shamt),
- (SROIW GPR:$rs1, uimm5:$shamt)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
-
-let Predicates = [HasStdExtZbp, IsRV64] in {
def : Pat<(riscv_rorw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
def : Pat<(riscv_rolw (riscv_greviw GPR:$rs1, 24), (i64 16)), (GREVIW GPR:$rs1, 8)>;
def : Pat<(riscv_greviw GPR:$rs1, timm:$shamt), (GREVIW GPR:$rs1, timm:$shamt)>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index 4f9e9cfbdb98..86fbc73d81d5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// This file describes the RISC-V instructions from the standard 'V' Vector
-/// extension, version 0.9.
+/// extension, version 0.10.
/// This version is still experimental as the 'V' extension hasn't been
/// ratified yet.
///
@@ -82,6 +82,12 @@ def simm5_plus1 : Operand<XLenVT>, ImmLeaf<XLenVT,
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+// load vd, (rs1)
+class VUnitStrideLoadMask<string opcodestr>
+ : RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0},
+ (outs VR:$vd),
+ (ins GPR:$rs1), opcodestr, "$vd, (${rs1})">;
+
// load vd, (rs1), vm
class VUnitStrideLoad<RISCVLSUMOP lumop, RISCVWidth width,
string opcodestr>
@@ -138,6 +144,12 @@ class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
// store vd, vs3, (rs1), vm
+class VUnitStrideStoreMask<string opcodestr>
+ : RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0},
+ (outs), (ins VR:$vs3, GPR:$rs1), opcodestr,
+ "$vs3, (${rs1})">;
+
+// store vd, vs3, (rs1), vm
class VUnitStrideStore<RISCVLSUMOP sumop, RISCVWidth width,
string opcodestr>
: RVInstVSU<0b000, width.Value{3}, sumop, width.Value{2-0},
@@ -423,10 +435,6 @@ multiclass VWholeLoad<bits<3> nf, string opcodestr> {
def E16_V : VWholeLoad<nf, LSWidth16, opcodestr # "e16.v">;
def E32_V : VWholeLoad<nf, LSWidth32, opcodestr # "e32.v">;
def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v">;
- def E128_V : VWholeLoad<nf, LSWidth128, opcodestr # "e128.v">;
- def E256_V : VWholeLoad<nf, LSWidth256, opcodestr # "e256.v">;
- def E512_V : VWholeLoad<nf, LSWidth512, opcodestr # "e512.v">;
- def E1024_V : VWholeLoad<nf, LSWidth1024, opcodestr # "e1024.v">;
}
//===----------------------------------------------------------------------===//
@@ -438,6 +446,9 @@ let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
def VSETVLI : RVInstSetVLi<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei),
"vsetvli", "$rd, $rs1, $vtypei">;
+def VSETIVLI : RVInstSetiVLi<(outs GPR:$rd), (ins uimm5:$uimm, VTypeIOp:$vtypei),
+ "vsetivli", "$rd, $uimm, $vtypei">;
+
def VSETVL : RVInstSetVL<(outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
"vsetvl", "$rd, $rs1, $rs2">;
} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
@@ -447,47 +458,30 @@ def VLE8_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth8, "vle8.v">;
def VLE16_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth16, "vle16.v">;
def VLE32_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth32, "vle32.v">;
def VLE64_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth64, "vle64.v">;
-def VLE128_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth128, "vle128.v">;
-def VLE256_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth256, "vle256.v">;
-def VLE512_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth512, "vle512.v">;
-def VLE1024_V : VUnitStrideLoad<LUMOPUnitStride, LSWidth1024, "vle1024.v">;
def VLE8FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth8, "vle8ff.v">;
def VLE16FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth16, "vle16ff.v">;
def VLE32FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth32, "vle32ff.v">;
def VLE64FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth64, "vle64ff.v">;
-def VLE128FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth128, "vle128ff.v">;
-def VLE256FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth256, "vle256ff.v">;
-def VLE512FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth512, "vle512ff.v">;
-def VLE1024FF_V : VUnitStrideLoad<LUMOPUnitStrideFF, LSWidth1024, "vle1024ff.v">;
+
+def VLE1_V : VUnitStrideLoadMask<"vle1.v">;
+def VSE1_V : VUnitStrideStoreMask<"vse1.v">;
def VSE8_V : VUnitStrideStore<SUMOPUnitStride, LSWidth8, "vse8.v">;
def VSE16_V : VUnitStrideStore<SUMOPUnitStride, LSWidth16, "vse16.v">;
def VSE32_V : VUnitStrideStore<SUMOPUnitStride, LSWidth32, "vse32.v">;
def VSE64_V : VUnitStrideStore<SUMOPUnitStride, LSWidth64, "vse64.v">;
-def VSE128_V : VUnitStrideStore<SUMOPUnitStride, LSWidth128, "vse128.v">;
-def VSE256_V : VUnitStrideStore<SUMOPUnitStride, LSWidth256, "vse256.v">;
-def VSE512_V : VUnitStrideStore<SUMOPUnitStride, LSWidth512, "vse512.v">;
-def VSE1024_V : VUnitStrideStore<SUMOPUnitStride, LSWidth1024, "vse1024.v">;
// Vector Strided Instructions
def VLSE8_V : VStridedLoad<LSWidth8, "vlse8.v">;
def VLSE16_V : VStridedLoad<LSWidth16, "vlse16.v">;
def VLSE32_V : VStridedLoad<LSWidth32, "vlse32.v">;
def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">;
-def VLSE128_V : VStridedLoad<LSWidth128, "vlse128.v">;
-def VLSE256_V : VStridedLoad<LSWidth256, "vlse256.v">;
-def VLSE512_V : VStridedLoad<LSWidth512, "vlse512.v">;
-def VLSE1024_V : VStridedLoad<LSWidth1024, "vlse1024.v">;
def VSSE8_V : VStridedStore<LSWidth8, "vsse8.v">;
def VSSE16_V : VStridedStore<LSWidth16, "vsse16.v">;
def VSSE32_V : VStridedStore<LSWidth32, "vsse32.v">;
def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">;
-def VSSE128_V : VStridedStore<LSWidth128, "vsse128.v">;
-def VSSE256_V : VStridedStore<LSWidth256, "vsse256.v">;
-def VSSE512_V : VStridedStore<LSWidth512, "vsse512.v">;
-def VSSE1024_V : VStridedStore<LSWidth1024, "vsse1024.v">;
// Vector Indexed Instructions
def VLUXEI8_V : VIndexedLoad<MOPLDIndexedUnord, LSWidth8, "vluxei8.v">;
@@ -806,8 +800,8 @@ defm VFWNMSAC_V : VALUr_FV_V_F<"vfwnmsac", 0b111111>;
// Vector Floating-Point Square-Root Instruction
defm VFSQRT_V : VALU_FV_VS2<"vfsqrt.v", 0b010011, 0b00000>;
-defm VFRSQRTE7_V : VALU_FV_VS2<"vfrsqrte7.v", 0b010011, 0b00100>;
-defm VFRECE7_V : VALU_FV_VS2<"vfrece7.v", 0b010011, 0b00101>;
+defm VFRSQRT7_V : VALU_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
+defm VFREC7_V : VALU_FV_VS2<"vfrec7.v", 0b010011, 0b00101>;
// Vector Floating-Point MIN/MAX Instructions
defm VFMIN_V : VALU_FV_V_F<"vfmin", 0b000100>;
@@ -1058,47 +1052,27 @@ let Predicates = [HasStdExtZvlsseg] in {
def VLSEG#nf#E16_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth16, "vlseg"#nf#"e16.v">;
def VLSEG#nf#E32_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth32, "vlseg"#nf#"e32.v">;
def VLSEG#nf#E64_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth64, "vlseg"#nf#"e64.v">;
- def VLSEG#nf#E128_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth128, "vlseg"#nf#"e128.v">;
- def VLSEG#nf#E256_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth256, "vlseg"#nf#"e256.v">;
- def VLSEG#nf#E512_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth512, "vlseg"#nf#"e512.v">;
- def VLSEG#nf#E1024_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStride, LSWidth1024, "vlseg"#nf#"e1024.v">;
def VLSEG#nf#E8FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth8, "vlseg"#nf#"e8ff.v">;
def VLSEG#nf#E16FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth16, "vlseg"#nf#"e16ff.v">;
def VLSEG#nf#E32FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth32, "vlseg"#nf#"e32ff.v">;
def VLSEG#nf#E64FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth64, "vlseg"#nf#"e64ff.v">;
- def VLSEG#nf#E128FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth128, "vlseg"#nf#"e128ff.v">;
- def VLSEG#nf#E256FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth256, "vlseg"#nf#"e256ff.v">;
- def VLSEG#nf#E512FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth512, "vlseg"#nf#"e512ff.v">;
- def VLSEG#nf#E1024FF_V : VUnitStrideSegmentLoad<!add(nf, -1), LUMOPUnitStrideFF, LSWidth1024, "vlseg"#nf#"e1024ff.v">;
def VSSEG#nf#E8_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth8, "vsseg"#nf#"e8.v">;
def VSSEG#nf#E16_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth16, "vsseg"#nf#"e16.v">;
def VSSEG#nf#E32_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth32, "vsseg"#nf#"e32.v">;
def VSSEG#nf#E64_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">;
- def VSSEG#nf#E128_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth128, "vsseg"#nf#"e128.v">;
- def VSSEG#nf#E256_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth256, "vsseg"#nf#"e256.v">;
- def VSSEG#nf#E512_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth512, "vsseg"#nf#"e512.v">;
- def VSSEG#nf#E1024_V : VUnitStrideSegmentStore<!add(nf, -1), LSWidth1024, "vsseg"#nf#"e1024.v">;
// Vector Strided Instructions
def VLSSEG#nf#E8_V : VStridedSegmentLoad<!add(nf, -1), LSWidth8, "vlsseg"#nf#"e8.v">;
def VLSSEG#nf#E16_V : VStridedSegmentLoad<!add(nf, -1), LSWidth16, "vlsseg"#nf#"e16.v">;
def VLSSEG#nf#E32_V : VStridedSegmentLoad<!add(nf, -1), LSWidth32, "vlsseg"#nf#"e32.v">;
def VLSSEG#nf#E64_V : VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">;
- def VLSSEG#nf#E128_V : VStridedSegmentLoad<!add(nf, -1), LSWidth128, "vlsseg"#nf#"e128.v">;
- def VLSSEG#nf#E256_V : VStridedSegmentLoad<!add(nf, -1), LSWidth256, "vlsseg"#nf#"e256.v">;
- def VLSSEG#nf#E512_V : VStridedSegmentLoad<!add(nf, -1), LSWidth512, "vlsseg"#nf#"e512.v">;
- def VLSSEG#nf#E1024_V : VStridedSegmentLoad<!add(nf, -1), LSWidth1024, "vlsseg"#nf#"e1024.v">;
def VSSSEG#nf#E8_V : VStridedSegmentStore<!add(nf, -1), LSWidth8, "vssseg"#nf#"e8.v">;
def VSSSEG#nf#E16_V : VStridedSegmentStore<!add(nf, -1), LSWidth16, "vssseg"#nf#"e16.v">;
def VSSSEG#nf#E32_V : VStridedSegmentStore<!add(nf, -1), LSWidth32, "vssseg"#nf#"e32.v">;
def VSSSEG#nf#E64_V : VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
- def VSSSEG#nf#E128_V : VStridedSegmentStore<!add(nf, -1), LSWidth128, "vssseg"#nf#"e128.v">;
- def VSSSEG#nf#E256_V : VStridedSegmentStore<!add(nf, -1), LSWidth256, "vssseg"#nf#"e256.v">;
- def VSSSEG#nf#E512_V : VStridedSegmentStore<!add(nf, -1), LSWidth512, "vssseg"#nf#"e512.v">;
- def VSSSEG#nf#E1024_V : VStridedSegmentStore<!add(nf, -1), LSWidth1024, "vssseg"#nf#"e1024.v">;
// Vector Indexed Instructions
def VLUXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
@@ -1109,14 +1083,6 @@ let Predicates = [HasStdExtZvlsseg] in {
LSWidth32, "vluxseg"#nf#"ei32.v">;
def VLUXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
LSWidth64, "vluxseg"#nf#"ei64.v">;
- def VLUXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth128, "vluxseg"#nf#"ei128.v">;
- def VLUXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth256, "vluxseg"#nf#"ei256.v">;
- def VLUXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth512, "vluxseg"#nf#"ei512.v">;
- def VLUXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord,
- LSWidth1024, "vluxseg"#nf#"ei1024.v">;
def VLOXSEG#nf#EI8_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
LSWidth8, "vloxseg"#nf#"ei8.v">;
@@ -1126,14 +1092,6 @@ let Predicates = [HasStdExtZvlsseg] in {
LSWidth32, "vloxseg"#nf#"ei32.v">;
def VLOXSEG#nf#EI64_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
LSWidth64, "vloxseg"#nf#"ei64.v">;
- def VLOXSEG#nf#EI128_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth128, "vloxseg"#nf#"ei128.v">;
- def VLOXSEG#nf#EI256_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth256, "vloxseg"#nf#"ei256.v">;
- def VLOXSEG#nf#EI512_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth512, "vloxseg"#nf#"ei512.v">;
- def VLOXSEG#nf#EI1024_V : VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder,
- LSWidth1024, "vloxseg"#nf#"ei1024.v">;
def VSUXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
LSWidth8, "vsuxseg"#nf#"ei8.v">;
@@ -1143,14 +1101,6 @@ let Predicates = [HasStdExtZvlsseg] in {
LSWidth32, "vsuxseg"#nf#"ei32.v">;
def VSUXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
LSWidth64, "vsuxseg"#nf#"ei64.v">;
- def VSUXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth128, "vsuxseg"#nf#"ei128.v">;
- def VSUXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth256, "vsuxseg"#nf#"ei256.v">;
- def VSUXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth512, "vsuxseg"#nf#"ei512.v">;
- def VSUXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord,
- LSWidth1024, "vsuxseg"#nf#"ei1024.v">;
def VSOXSEG#nf#EI8_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
LSWidth8, "vsoxseg"#nf#"ei8.v">;
@@ -1160,14 +1110,6 @@ let Predicates = [HasStdExtZvlsseg] in {
LSWidth32, "vsoxseg"#nf#"ei32.v">;
def VSOXSEG#nf#EI64_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
LSWidth64, "vsoxseg"#nf#"ei64.v">;
- def VSOXSEG#nf#EI128_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth128, "vsoxseg"#nf#"ei128.v">;
- def VSOXSEG#nf#EI256_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth256, "vsoxseg"#nf#"ei256.v">;
- def VSOXSEG#nf#EI512_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth512, "vsoxseg"#nf#"ei512.v">;
- def VSOXSEG#nf#EI1024_V : VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder,
- LSWidth1024, "vsoxseg"#nf#"ei1024.v">;
}
} // Predicates = [HasStdExtZvlsseg]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 06e4d053d5d7..60bd1b24cab8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
///
/// This file contains the required infrastructure to support code generation
-/// for the standard 'V' (Vector) extension, version 0.9. This version is still
+/// for the standard 'V' (Vector) extension, version 0.10. This version is still
/// experimental as the 'V' extension hasn't been ratified yet.
///
/// This file is included from RISCVInstrInfoV.td
@@ -42,17 +42,7 @@ def riscv_read_vl : SDNode<"RISCVISD::READ_VL",
//--------------------------------------------------------------
// !X0 | X0 | VLMAX | Set vl to VLMAX
// X0 | X0 | Value in vl | Keep current vl, just change vtype.
-def NoX0 : SDNodeXForm<undef,
-[{
- auto *C = dyn_cast<ConstantSDNode>(N);
- if (C && C->isNullValue()) {
- SDLoc DL(N);
- return SDValue(CurDAG->getMachineNode(RISCV::ADDI, DL, Subtarget->getXLenVT(),
- CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT()),
- CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT())), 0);
- }
- return SDValue(N, 0);
-}]>;
+def VLOp : ComplexPattern<XLenVT, 1, "selectVLOp">;
def DecImm : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue() - 1, SDLoc(N),
@@ -1228,6 +1218,14 @@ multiclass VPseudoUSLoad {
}
}
+multiclass VPseudoLoadMask {
+ foreach mti = AllMasks in {
+ let VLMul = mti.LMul.value in {
+ def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR>;
+ }
+ }
+}
+
multiclass VPseudoSLoad {
foreach lmul = MxList.m in {
defvar LInfo = lmul.MX;
@@ -1264,6 +1262,14 @@ multiclass VPseudoUSStore {
}
}
+multiclass VPseudoStoreMask {
+ foreach mti = AllMasks in {
+ let VLMul = mti.LMul.value in {
+ def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR>;
+ }
+ }
+}
+
multiclass VPseudoSStore {
foreach lmul = MxList.m in {
defvar LInfo = lmul.MX;
@@ -1951,10 +1957,10 @@ class VPatUnaryNoMask<string intrinsic_name,
VReg op2_reg_class> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(op2_type op2_reg_class:$rs2),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(op2_type op2_reg_class:$rs2),
- (NoX0 GPR:$vl), sew)>;
+ GPR:$vl, sew)>;
class VPatUnaryMask<string intrinsic_name,
string inst,
@@ -1970,21 +1976,21 @@ class VPatUnaryMask<string intrinsic_name,
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
- (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ (mask_type V0), GPR:$vl, sew)>;
class VPatMaskUnaryNoMask<string intrinsic_name,
string inst,
MTypeInfo mti> :
Pat<(mti.Mask (!cast<Intrinsic>(intrinsic_name)
(mti.Mask VR:$rs2),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_M_"#mti.BX)
(mti.Mask VR:$rs2),
- (NoX0 GPR:$vl), mti.SEW)>;
+ GPR:$vl, mti.SEW)>;
class VPatMaskUnaryMask<string intrinsic_name,
string inst,
@@ -1993,11 +1999,11 @@ class VPatMaskUnaryMask<string intrinsic_name,
(mti.Mask VR:$merge),
(mti.Mask VR:$rs2),
(mti.Mask V0),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK")
(mti.Mask VR:$merge),
(mti.Mask VR:$rs2),
- (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
+ (mti.Mask V0), GPR:$vl, mti.SEW)>;
class VPatUnaryAnyMask<string intrinsic,
string inst,
@@ -2013,12 +2019,12 @@ class VPatUnaryAnyMask<string intrinsic,
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(mask_type VR:$rs2),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(mask_type VR:$rs2),
- (NoX0 GPR:$vl), sew)>;
+ GPR:$vl, sew)>;
class VPatBinaryNoMask<string intrinsic_name,
string inst,
@@ -2031,11 +2037,11 @@ class VPatBinaryNoMask<string intrinsic_name,
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst)
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- (NoX0 GPR:$vl), sew)>;
+ GPR:$vl, sew)>;
class VPatBinaryMask<string intrinsic_name,
string inst,
@@ -2052,12 +2058,12 @@ class VPatBinaryMask<string intrinsic_name,
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
(mask_type V0),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_MASK")
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ (mask_type V0), GPR:$vl, sew)>;
class VPatTernaryNoMask<string intrinsic,
string inst,
@@ -2075,12 +2081,12 @@ class VPatTernaryNoMask<string intrinsic,
(result_type result_reg_class:$rs3),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
result_reg_class:$rs3,
(op1_type op1_reg_class:$rs1),
op2_kind:$rs2,
- (NoX0 GPR:$vl), sew)>;
+ GPR:$vl, sew)>;
class VPatTernaryMask<string intrinsic,
string inst,
@@ -2099,13 +2105,13 @@ class VPatTernaryMask<string intrinsic,
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
(mask_type V0),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
result_reg_class:$rs3,
(op1_type op1_reg_class:$rs1),
op2_kind:$rs2,
(mask_type V0),
- (NoX0 GPR:$vl), sew)>;
+ GPR:$vl, sew)>;
class VPatAMOWDNoMask<string intrinsic_name,
string inst,
@@ -2119,10 +2125,10 @@ class VPatAMOWDNoMask<string intrinsic_name,
GPR:$rs1,
(op1_type op1_reg_class:$vs2),
(result_type vlmul.vrclass:$vd),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX)
$rs1, $vs2, $vd,
- (NoX0 GPR:$vl), sew)>;
+ GPR:$vl, sew)>;
class VPatAMOWDMask<string intrinsic_name,
string inst,
@@ -2138,10 +2144,10 @@ class VPatAMOWDMask<string intrinsic_name,
(op1_type op1_reg_class:$vs2),
(result_type vlmul.vrclass:$vd),
(mask_type V0),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK")
$rs1, $vs2, $vd,
- (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ (mask_type V0), GPR:$vl, sew)>;
multiclass VPatUSLoad<string intrinsic,
string inst,
@@ -2153,14 +2159,14 @@ multiclass VPatUSLoad<string intrinsic,
{
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
- def : Pat<(type (Intr GPR:$rs1, GPR:$vl)),
- (Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
+ def : Pat<(type (Intr GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, GPR:$vl, sew)>;
defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
- GPR:$rs1, (mask_type V0), GPR:$vl)),
+ GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
(PseudoMask $merge,
- $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ $rs1, (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatUSLoadFF<string inst,
@@ -2171,13 +2177,13 @@ multiclass VPatUSLoadFF<string inst,
VReg reg_class>
{
defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
- def : Pat<(type (riscv_vleff GPR:$rs1, GPR:$vl)),
- (Pseudo $rs1, (NoX0 GPR:$vl), sew)>;
+ def : Pat<(type (riscv_vleff GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, GPR:$vl, sew)>;
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
def : Pat<(type (riscv_vleff_mask (type GetVRegNoV0<reg_class>.R:$merge),
- GPR:$rs1, (mask_type V0), GPR:$vl)),
+ GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
(PseudoMask $merge,
- $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ $rs1, (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatSLoad<string intrinsic,
@@ -2190,14 +2196,14 @@ multiclass VPatSLoad<string intrinsic,
{
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
- def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, GPR:$vl)),
- (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+ def : Pat<(type (Intr GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
- GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl)),
+ GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
(PseudoMask $merge,
- $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatILoad<string intrinsic,
@@ -2213,16 +2219,16 @@ multiclass VPatILoad<string intrinsic,
{
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
- def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), GPR:$vl)),
- (Pseudo $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+ def : Pat<(type (Intr GPR:$rs1, (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl)))),
+ (Pseudo $rs1, $rs2, GPR:$vl, sew)>;
defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
def : Pat<(type (IntrMask (type GetVRegNoV0<reg_class>.R:$merge),
GPR:$rs1, (idx_type idx_reg_class:$rs2),
- (mask_type V0), GPR:$vl)),
+ (mask_type V0), (XLenVT (VLOp GPR:$vl)))),
(PseudoMask $merge,
- $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatUSStore<string intrinsic,
@@ -2235,12 +2241,12 @@ multiclass VPatUSStore<string intrinsic,
{
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
- def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$vl),
- (Pseudo $rs3, $rs1, (NoX0 GPR:$vl), sew)>;
+ def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+ (Pseudo $rs3, $rs1, GPR:$vl, sew)>;
defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
- def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), GPR:$vl),
- (PseudoMask $rs3, $rs1, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+ (PseudoMask $rs3, $rs1, (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatSStore<string intrinsic,
@@ -2253,12 +2259,12 @@ multiclass VPatSStore<string intrinsic,
{
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_V_"#vlmul.MX);
- def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, GPR:$vl),
- (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+ def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (XLenVT (VLOp GPR:$vl))),
+ (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#vlmul.MX#"_MASK");
- def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), GPR:$vl),
- (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1, GPR:$rs2, (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+ (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatIStore<string intrinsic,
@@ -2275,13 +2281,13 @@ multiclass VPatIStore<string intrinsic,
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX);
def : Pat<(Intr (type reg_class:$rs3), GPR:$rs1,
- (idx_type idx_reg_class:$rs2), GPR:$vl),
- (Pseudo $rs3, $rs1, $rs2, (NoX0 GPR:$vl), sew)>;
+ (idx_type idx_reg_class:$rs2), (XLenVT (VLOp GPR:$vl))),
+ (Pseudo $rs3, $rs1, $rs2, GPR:$vl, sew)>;
defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
defvar PseudoMask = !cast<Instruction>(inst#"_V_"#idx_vlmul.MX#"_"#vlmul.MX#"_MASK");
def : Pat<(IntrMask (type reg_class:$rs3), GPR:$rs1,
- (idx_type idx_reg_class:$rs2), (mask_type V0), GPR:$vl),
- (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ (idx_type idx_reg_class:$rs2), (mask_type V0), (XLenVT (VLOp GPR:$vl))),
+ (PseudoMask $rs3, $rs1, $rs2, (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatUnaryS_M<string intrinsic_name,
@@ -2289,13 +2295,13 @@ multiclass VPatUnaryS_M<string intrinsic_name,
{
foreach mti = AllMasks in {
def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name)
- (mti.Mask VR:$rs1), GPR:$vl)),
+ (mti.Mask VR:$rs1), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_M_"#mti.BX) $rs1,
- (NoX0 GPR:$vl), mti.SEW)>;
+ GPR:$vl, mti.SEW)>;
def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name # "_mask")
- (mti.Mask VR:$rs1), (mti.Mask V0), GPR:$vl)),
+ (mti.Mask VR:$rs1), (mti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK") $rs1,
- (mti.Mask V0), (NoX0 GPR:$vl), mti.SEW)>;
+ (mti.Mask V0), GPR:$vl, mti.SEW)>;
}
}
@@ -2360,24 +2366,24 @@ multiclass VPatNullaryV<string intrinsic, string instruction>
{
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(instruction#"_V_" # vti.LMul.MX)
- (NoX0 GPR:$vl), vti.SEW)>;
+ GPR:$vl, vti.SEW)>;
def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask")
(vti.Vector vti.RegClass:$merge),
- (vti.Mask V0), (XLenVT GPR:$vl))),
+ (vti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK")
vti.RegClass:$merge, (vti.Mask V0),
- (NoX0 GPR:$vl), vti.SEW)>;
+ GPR:$vl, vti.SEW)>;
}
}
multiclass VPatNullaryM<string intrinsic, string inst> {
foreach mti = AllMasks in
def : Pat<(mti.Mask (!cast<Intrinsic>(intrinsic)
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_M_"#mti.BX)
- (NoX0 GPR:$vl), mti.SEW)>;
+ GPR:$vl, mti.SEW)>;
}
multiclass VPatBinary<string intrinsic,
@@ -2414,11 +2420,11 @@ multiclass VPatBinaryCarryIn<string intrinsic,
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
(mask_type V0),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- (mask_type V0), (NoX0 GPR:$vl), sew)>;
+ (mask_type V0), GPR:$vl, sew)>;
}
multiclass VPatBinaryMaskOut<string intrinsic,
@@ -2435,11 +2441,11 @@ multiclass VPatBinaryMaskOut<string intrinsic,
def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- (XLenVT GPR:$vl))),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- (NoX0 GPR:$vl), sew)>;
+ GPR:$vl, sew)>;
}
multiclass VPatConversion<string intrinsic,
@@ -3125,7 +3131,7 @@ def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins),
// Pseudos.
let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
def PseudoVSETVLI : Pseudo<(outs GPR:$rd), (ins GPR:$rs1, VTypeIOp:$vtypei), []>;
-
+def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), []>;
}
//===----------------------------------------------------------------------===//
@@ -3142,6 +3148,9 @@ foreach eew = EEWList in {
defm PseudoVSE # eew : VPseudoUSStore;
}
+defm PseudoVLE1 : VPseudoLoadMask;
+defm PseudoVSE1 : VPseudoStoreMask;
+
//===----------------------------------------------------------------------===//
// 7.5 Vector Strided Instructions
//===----------------------------------------------------------------------===//
@@ -3437,12 +3446,12 @@ defm PseudoVFSQRT : VPseudoUnaryV_V;
//===----------------------------------------------------------------------===//
// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFRSQRTE7 : VPseudoUnaryV_V;
+defm PseudoVFRSQRT7 : VPseudoUnaryV_V;
//===----------------------------------------------------------------------===//
// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-defm PseudoVFRECE7 : VPseudoUnaryV_V;
+defm PseudoVFREC7 : VPseudoUnaryV_V;
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
@@ -3719,6 +3728,15 @@ foreach vti = AllVectors in
vti.Vector, vti.Mask, vti.SEW, vti.LMul, vti.RegClass>;
}
+foreach vti = AllMasks in {
+ defvar PseudoVLE1 = !cast<Instruction>("PseudoVLE1_V_"#vti.BX);
+ def : Pat<(vti.Mask (int_riscv_vle1 GPR:$rs1, (XLenVT (VLOp GPR:$vl)))),
+ (PseudoVLE1 $rs1, GPR:$vl, vti.SEW)>;
+ defvar PseudoVSE1 = !cast<Instruction>("PseudoVSE1_V_"#vti.BX);
+ def : Pat<(int_riscv_vse1 (vti.Mask VR:$rs3), GPR:$rs1, (XLenVT (VLOp GPR:$vl))),
+ (PseudoVSE1 $rs3, $rs1, GPR:$vl, vti.SEW)>;
+}
+
//===----------------------------------------------------------------------===//
// 7.5 Vector Strided Instructions
//===----------------------------------------------------------------------===//
@@ -3886,62 +3904,63 @@ defm "" : VPatBinaryM_VX_VI<"int_riscv_vmsgt", "PseudoVMSGT", AllIntegerVectors>
// instruction.
foreach vti = AllIntegerVectors in {
def : Pat<(vti.Mask (int_riscv_vmslt (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2), GPR:$vl)),
+ (vti.Scalar simm5_plus1:$rs2), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
(DecImm simm5_plus1:$rs2),
- (NoX0 GPR:$vl),
+ GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(vti.Mask VR:$merge),
- GPR:$vl)),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
VR:$merge,
vti.RegClass:$rs1,
(DecImm simm5_plus1:$rs2),
(vti.Mask V0),
- (NoX0 GPR:$vl),
+ GPR:$vl,
vti.SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2), GPR:$vl)),
+ def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar simm5_plus1:$rs2),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
(DecImm simm5_plus1:$rs2),
- (NoX0 GPR:$vl),
+ GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(vti.Mask VR:$merge),
- GPR:$vl)),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
VR:$merge,
vti.RegClass:$rs1,
(DecImm simm5_plus1:$rs2),
(vti.Mask V0),
- (NoX0 GPR:$vl),
+ GPR:$vl,
vti.SEW)>;
// Special cases to avoid matching vmsltu.vi 0 (always false) to
// vmsleu.vi -1 (always true). Instead match to vmsne.vv.
def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar 0), GPR:$vl)),
+ (vti.Scalar 0), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
vti.RegClass:$rs1,
- (NoX0 GPR:$vl),
+ GPR:$vl,
vti.SEW)>;
def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar 0),
(vti.Mask VR:$merge),
- GPR:$vl)),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
VR:$merge,
vti.RegClass:$rs1,
vti.RegClass:$rs1,
(vti.Mask V0),
- (NoX0 GPR:$vl),
+ GPR:$vl,
vti.SEW)>;
}
@@ -4002,18 +4021,18 @@ defm "" : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">;
//===----------------------------------------------------------------------===//
foreach vti = AllVectors in {
def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$rs1),
- GPR:$vl)),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
- $rs1, (NoX0 GPR:$vl), vti.SEW)>;
+ $rs1, GPR:$vl, vti.SEW)>;
}
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, GPR:$vl)),
+ def : Pat<(vti.Vector (int_riscv_vmv_v_x GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
- $rs2, (NoX0 GPR:$vl), vti.SEW)>;
- def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, GPR:$vl)),
+ $rs2, GPR:$vl, vti.SEW)>;
+ def : Pat<(vti.Vector (int_riscv_vmv_v_x simm5:$imm5, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
- simm5:$imm5, (NoX0 GPR:$vl), vti.SEW)>;
+ simm5:$imm5, GPR:$vl, vti.SEW)>;
}
//===----------------------------------------------------------------------===//
@@ -4109,12 +4128,12 @@ defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
//===----------------------------------------------------------------------===//
-defm "" : VPatUnaryV_V<"int_riscv_vfrsqrte7", "PseudoVFRSQRTE7", AllFloatVectors>;
+defm "" : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 14.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-defm "" : VPatUnaryV_V<"int_riscv_vfrece7", "PseudoVFRECE7", AllFloatVectors>;
+defm "" : VPatUnaryV_V<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
@@ -4157,8 +4176,8 @@ foreach fvti = AllFloatVectors in {
defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$rs2),
(fvti.Scalar (fpimm0)),
- (fvti.Mask V0), (XLenVT GPR:$vl))),
- (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), (NoX0 GPR:$vl), fvti.SEW)>;
+ (fvti.Mask V0), (XLenVT (VLOp GPR:$vl)))),
+ (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.SEW)>;
}
//===----------------------------------------------------------------------===//
@@ -4167,16 +4186,16 @@ foreach fvti = AllFloatVectors in {
foreach fvti = AllFloatVectors in {
// If we're splatting fpimm0, use vmv.v.x vd, x0.
def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
- (fvti.Scalar (fpimm0)), GPR:$vl)),
+ (fvti.Scalar (fpimm0)), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
- 0, (NoX0 GPR:$vl), fvti.SEW)>;
+ 0, GPR:$vl, fvti.SEW)>;
def : Pat<(fvti.Vector (int_riscv_vfmv_v_f
- (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)),
+ (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
fvti.LMul.MX)
(fvti.Scalar fvti.ScalarRegClass:$rs2),
- (NoX0 GPR:$vl), fvti.SEW)>;
+ GPR:$vl, fvti.SEW)>;
}
//===----------------------------------------------------------------------===//
@@ -4321,9 +4340,9 @@ foreach vti = AllIntegerVectors in {
def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)),
(!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.SEW)>;
def : Pat<(vti.Vector (int_riscv_vmv_s_x (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, GPR:$vl)),
+ GPR:$rs2, (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMV_S_X_" # vti.LMul.MX)
- (vti.Vector $rs1), $rs2, (NoX0 GPR:$vl), vti.SEW)>;
+ (vti.Vector $rs1), $rs2, GPR:$vl, vti.SEW)>;
}
} // Predicates = [HasStdExtV]
@@ -4339,12 +4358,12 @@ foreach fvti = AllFloatVectors in {
(instr $rs2, fvti.SEW)>;
def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl)),
+ (fvti.Scalar fvti.ScalarRegClass:$rs2), (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" #
fvti.LMul.MX)
(fvti.Vector $rs1),
(fvti.Scalar fvti.ScalarRegClass:$rs2),
- (NoX0 GPR:$vl), fvti.SEW)>;
+ GPR:$vl, fvti.SEW)>;
}
} // Predicates = [HasStdExtV, HasStdExtF]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index aea3d0e17ccc..dee67708bed1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -8,7 +8,7 @@
///
/// This file contains the required infrastructure and SDNode patterns to
/// support code generation for the standard 'V' (Vector) extension, version
-/// 0.9. This version is still experimental as the 'V' extension hasn't been
+/// 0.10. This version is still experimental as the 'V' extension hasn't been
/// ratified yet.
///
/// This file is included from and depends upon RISCVInstrInfoVPseudos.td
@@ -384,8 +384,8 @@ defm "" : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH">;
defm "" : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU">;
// 12.11. Vector Integer Divide Instructions
-defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIVU">;
-defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIV">;
+defm "" : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU">;
+defm "" : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">;
defm "" : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">;
defm "" : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index a404f7ced70a..8c1fa840f19c 100644
--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -334,7 +334,7 @@ inline static bool isMImmVal(uint64_t Val) {
return true;
}
// (m)1 patterns
- return (Val & (1UL << 63)) && isShiftedMask_64(Val);
+ return (Val & (UINT64_C(1) << 63)) && isShiftedMask_64(Val);
}
inline static bool isMImm32Val(uint32_t Val) {
@@ -347,14 +347,14 @@ inline static bool isMImm32Val(uint32_t Val) {
return true;
}
// (m)1 patterns
- return (Val & (1 << 31)) && isShiftedMask_32(Val);
+ return (Val & (UINT32_C(1) << 31)) && isShiftedMask_32(Val);
}
/// val2MImm - Convert an integer immediate value to target MImm immediate.
inline static uint64_t val2MImm(uint64_t Val) {
if (Val == 0)
return 0; // (0)1
- if (Val & (1UL << 63))
+ if (Val & (UINT64_C(1) << 63))
return countLeadingOnes(Val); // (m)1
return countLeadingZeros(Val) | 0x40; // (m)0
}
@@ -364,8 +364,8 @@ inline static uint64_t mimm2Val(uint64_t Val) {
if (Val == 0)
return 0; // (0)1
if ((Val & 0x40) == 0)
- return (uint64_t)((1L << 63) >> (Val & 0x3f)); // (m)1
- return ((uint64_t)(-1L) >> (Val & 0x3f)); // (m)0
+ return (uint64_t)((INT64_C(1) << 63) >> (Val & 0x3f)); // (m)1
+ return ((uint64_t)INT64_C(-1) >> (Val & 0x3f)); // (m)0
}
inline unsigned M0(unsigned Val) { return Val + 64; }
diff --git a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 05e482a6b66e..4e6d8e8e1a54 100644
--- a/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -492,6 +492,7 @@ static int readPrefixes(struct InternalInstruction *insn) {
insn->addressSize = (insn->hasAdSize ? 4 : 8);
insn->displacementSize = 4;
insn->immediateSize = 4;
+ insn->hasOpSize = false;
} else {
insn->registerSize = (insn->hasOpSize ? 2 : 4);
insn->addressSize = (insn->hasAdSize ? 4 : 8);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0dd20235aa3c..6b816c710f98 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36916,11 +36916,18 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
Res = DAG.getNode(SrcOpc0, DL, SrcVT0, DAG.getBitcast(SrcVT0, Res));
return DAG.getBitcast(VT, Res);
}
+ case X86ISD::VPERMILPI:
+ // TODO: Handle v4f64 permutes with different low/high lane masks.
+ if (SrcVT0 == MVT::v4f64) {
+ uint64_t Mask = Src0.getConstantOperandVal(1);
+ if ((Mask & 0x3) != ((Mask >> 2) & 0x3))
+ break;
+ }
+ LLVM_FALLTHROUGH;
case X86ISD::VSHLI:
case X86ISD::VSRLI:
case X86ISD::VSRAI:
case X86ISD::PSHUFD:
- case X86ISD::VPERMILPI:
if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) {
SDValue LHS = DAG.getBitcast(VT, Src0.getOperand(0));
SDValue RHS =
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0c2b278fdd7b..19012797ae9a 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -1123,10 +1123,10 @@ defm : vextract_for_mask_cast<"VEXTRACTI64x4Z", v64i8_info, v32i8x_info,
EXTRACT_get_vextract256_imm, [HasAVX512]>;
// vextractps - extract 32 bits from XMM
-def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32:$dst),
+def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
(ins VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
+ [(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 37fc27e91100..41d7f363e1a4 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -30,7 +30,7 @@ namespace llvm {
ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
StringRef CalleeName) {
if (CalleeName.empty())
- return getChildContext(CallSite);
+ return getHottestChildContext(CallSite);
uint32_t Hash = nodeHash(CalleeName, CallSite);
auto It = AllChildContext.find(Hash);
@@ -40,18 +40,22 @@ ContextTrieNode *ContextTrieNode::getChildContext(const LineLocation &CallSite,
}
ContextTrieNode *
-ContextTrieNode::getChildContext(const LineLocation &CallSite) {
+ContextTrieNode::getHottestChildContext(const LineLocation &CallSite) {
// CSFDO-TODO: This could be slow, change AllChildContext so we can
// do point look up for child node by call site alone.
- // CSFDO-TODO: Return the child with max count for indirect call
+ // Retrieve the child node with max count for indirect call
ContextTrieNode *ChildNodeRet = nullptr;
+ uint64_t MaxCalleeSamples = 0;
for (auto &It : AllChildContext) {
ContextTrieNode &ChildNode = It.second;
- if (ChildNode.CallSiteLoc == CallSite) {
- if (ChildNodeRet)
- return nullptr;
- else
- ChildNodeRet = &ChildNode;
+ if (ChildNode.CallSiteLoc != CallSite)
+ continue;
+ FunctionSamples *Samples = ChildNode.getFunctionSamples();
+ if (!Samples)
+ continue;
+ if (Samples->getTotalSamples() > MaxCalleeSamples) {
+ ChildNodeRet = &ChildNode;
+ MaxCalleeSamples = Samples->getTotalSamples();
}
}
@@ -179,7 +183,7 @@ SampleContextTracker::SampleContextTracker(
SampleContext Context(FuncSample.first(), RawContext);
LLVM_DEBUG(dbgs() << "Tracking Context for function: " << Context << "\n");
if (!Context.isBaseContext())
- FuncToCtxtProfileSet[Context.getName()].insert(FSamples);
+ FuncToCtxtProfileSet[Context.getNameWithoutContext()].insert(FSamples);
ContextTrieNode *NewNode = getOrCreateContextPath(Context, true);
assert(!NewNode->getFunctionSamples() &&
"New node can't have sample profile");
@@ -191,12 +195,12 @@ FunctionSamples *
SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
StringRef CalleeName) {
LLVM_DEBUG(dbgs() << "Getting callee context for instr: " << Inst << "\n");
- // CSFDO-TODO: We use CalleeName to differentiate indirect call
- // We need to get sample for indirect callee too.
DILocation *DIL = Inst.getDebugLoc();
if (!DIL)
return nullptr;
+ // For indirect call, CalleeName will be empty, in which case the context
+ // profile for callee with largest total samples will be returned.
ContextTrieNode *CalleeContext = getCalleeContextFor(DIL, CalleeName);
if (CalleeContext) {
FunctionSamples *FSamples = CalleeContext->getFunctionSamples();
@@ -209,6 +213,26 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
return nullptr;
}
+std::vector<const FunctionSamples *>
+SampleContextTracker::getIndirectCalleeContextSamplesFor(
+ const DILocation *DIL) {
+ std::vector<const FunctionSamples *> R;
+ if (!DIL)
+ return R;
+
+ ContextTrieNode *CallerNode = getContextFor(DIL);
+ LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ for (auto &It : CallerNode->getAllChildContext()) {
+ ContextTrieNode &ChildNode = It.second;
+ if (ChildNode.getCallSiteLoc() != CallSite)
+ continue;
+ if (FunctionSamples *CalleeSamples = ChildNode.getFunctionSamples())
+ R.push_back(CalleeSamples);
+ }
+
+ return R;
+}
+
FunctionSamples *
SampleContextTracker::getContextSamplesFor(const DILocation *DIL) {
assert(DIL && "Expect non-null location");
@@ -295,11 +319,6 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
const Instruction &Inst, StringRef CalleeName) {
LLVM_DEBUG(dbgs() << "Promoting and merging context tree for instr: \n"
<< Inst << "\n");
- // CSFDO-TODO: We also need to promote context profile from indirect
- // calls. We won't have callee names from those from call instr.
- if (CalleeName.empty())
- return;
-
// Get the caller context for the call instruction, we don't use callee
// name from call because there can be context from indirect calls too.
DILocation *DIL = Inst.getDebugLoc();
@@ -308,8 +327,23 @@ void SampleContextTracker::promoteMergeContextSamplesTree(
return;
// Get the context that needs to be promoted
- LineLocation CallSite(FunctionSamples::getOffset(DIL),
- DIL->getBaseDiscriminator());
+ LineLocation CallSite = FunctionSamples::getCallSiteIdentifier(DIL);
+ // For indirect call, CalleeName will be empty, in which case we need to
+ // promote all non-inlined child context profiles.
+ if (CalleeName.empty()) {
+ for (auto &It : CallerNode->getAllChildContext()) {
+ ContextTrieNode *NodeToPromo = &It.second;
+ if (CallSite != NodeToPromo->getCallSiteLoc())
+ continue;
+ FunctionSamples *FromSamples = NodeToPromo->getFunctionSamples();
+ if (FromSamples && FromSamples->getContext().hasState(InlinedContext))
+ continue;
+ promoteMergeContextSamplesTree(*NodeToPromo);
+ }
+ return;
+ }
+
+ // Get the context for the given callee that needs to be promoted
ContextTrieNode *NodeToPromo =
CallerNode->getChildContext(CallSite, CalleeName);
if (!NodeToPromo)
@@ -329,6 +363,8 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
LLVM_DEBUG(dbgs() << " Found context tree root to promote: "
<< FromSamples->getContext() << "\n");
+ assert(!FromSamples->getContext().hasState(InlinedContext) &&
+ "Shouldn't promote inlined context profile");
StringRef ContextStrToRemove = FromSamples->getContext().getCallingContext();
return promoteMergeContextSamplesTree(NodeToPromo, RootContext,
ContextStrToRemove);
@@ -361,18 +397,14 @@ SampleContextTracker::getCalleeContextFor(const DILocation *DIL,
StringRef CalleeName) {
assert(DIL && "Expect non-null location");
- // CSSPGO-TODO: need to support indirect callee
- if (CalleeName.empty())
- return nullptr;
-
ContextTrieNode *CallContext = getContextFor(DIL);
if (!CallContext)
return nullptr;
+ // When CalleeName is empty, the child context profile with max
+ // total samples will be returned.
return CallContext->getChildContext(
- LineLocation(FunctionSamples::getOffset(DIL),
- DIL->getBaseDiscriminator()),
- CalleeName);
+ FunctionSamples::getCallSiteIdentifier(DIL), CalleeName);
}
ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
@@ -386,8 +418,8 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
if (Name.empty())
Name = PrevDIL->getScope()->getSubprogram()->getName();
S.push_back(
- std::make_pair(LineLocation(FunctionSamples::getOffset(DIL),
- DIL->getBaseDiscriminator()), Name));
+ std::make_pair(FunctionSamples::getCallSiteIdentifier(DIL),
+ PrevDIL->getScope()->getSubprogram()->getLinkageName()));
PrevDIL = DIL;
}
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 264ac4065e8c..b2a9127773c3 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
+#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -107,6 +108,16 @@ STATISTIC(NumCSNotInlined,
STATISTIC(NumMismatchedProfile,
"Number of functions with CFG mismatched profile");
STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+ "Number of inlined callsites with a partial distribution factor");
+
+STATISTIC(NumCSInlinedHitMinLimit,
+ "Number of functions with FDO inline stopped due to min size limit");
+STATISTIC(NumCSInlinedHitMaxLimit,
+ "Number of functions with FDO inline stopped due to max size limit");
+STATISTIC(
+ NumCSInlinedHitGrowthLimit,
+ "Number of functions with FDO inline stopped due to growth size limit");
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
@@ -171,6 +182,38 @@ static cl::opt<bool> ProfileSizeInline(
cl::desc("Inline cold call sites in profile loader if it's beneficial "
"for code size."));
+static cl::opt<int> ProfileInlineGrowthLimit(
+ "sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
+ cl::desc("The size growth ratio limit for proirity-based sample profile "
+ "loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMin(
+ "sample-profile-inline-limit-min", cl::Hidden, cl::init(100),
+ cl::desc("The lower bound of size growth limit for "
+ "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileInlineLimitMax(
+ "sample-profile-inline-limit-max", cl::Hidden, cl::init(10000),
+ cl::desc("The upper bound of size growth limit for "
+ "proirity-based sample profile loader inlining."));
+
+static cl::opt<int> ProfileICPThreshold(
+ "sample-profile-icp-threshold", cl::Hidden, cl::init(5),
+ cl::desc(
+ "Relative hotness threshold for indirect "
+ "call promotion in proirity-based sample profile loader inlining."));
+
+static cl::opt<int> SampleHotCallSiteThreshold(
+ "sample-profile-hot-inline-threshold", cl::Hidden, cl::init(3000),
+ cl::desc("Hot callsite threshold for proirity-based sample profile loader "
+ "inlining."));
+
+static cl::opt<bool> CallsitePrioritizedInline(
+ "sample-profile-prioritized-inline", cl::Hidden, cl::ZeroOrMore,
+ cl::init(false),
+ cl::desc("Use call site prioritized inlining for sample profile loader."
+ "Currently only CSSPGO is supported."));
+
static cl::opt<int> SampleColdCallSiteThreshold(
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
@@ -313,6 +356,38 @@ private:
DenseMap<uint64_t, StringRef> &CurrentGUIDToFuncNameMap;
};
+// Inline candidate used by iterative callsite prioritized inliner
+struct InlineCandidate {
+ CallBase *CallInstr;
+ const FunctionSamples *CalleeSamples;
+ // Prorated callsite count, which will be used to guide inlining. For example,
+ // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+ // copies will get their own distribution factors and their prorated counts
+ // will be used to decide if they should be inlined independently.
+ uint64_t CallsiteCount;
+ // Call site distribution factor to prorate the profile samples for a
+ // duplicated callsite. Default value is 1.0.
+ float CallsiteDistribution;
+};
+
+// Inline candidate comparer using call site weight
+struct CandidateComparer {
+ bool operator()(const InlineCandidate &LHS, const InlineCandidate &RHS) {
+ if (LHS.CallsiteCount != RHS.CallsiteCount)
+ return LHS.CallsiteCount < RHS.CallsiteCount;
+
+ // Tie breaker using GUID so we have stable/deterministic inlining order
+ assert(LHS.CalleeSamples && RHS.CalleeSamples &&
+ "Expect non-null FunctionSamples");
+ return LHS.CalleeSamples->getGUID(LHS.CalleeSamples->getName()) <
+ RHS.CalleeSamples->getGUID(RHS.CalleeSamples->getName());
+ }
+};
+
+using CandidateQueue =
+ PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
+ CandidateComparer>;
+
/// Sample profile pass.
///
/// This pass reads profile data from the file specified by
@@ -350,9 +425,21 @@ protected:
findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
- bool inlineCallInstruction(CallBase &CB);
+ // Attempt to promote indirect call and also inline the promoted call
+ bool tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+ uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
bool inlineHotFunctions(Function &F,
DenseSet<GlobalValue::GUID> &InlinedGUIDs);
+ InlineCost shouldInlineCandidate(InlineCandidate &Candidate);
+ bool getInlineCandidate(InlineCandidate *NewCandidate, CallBase *CB);
+ bool
+ tryInlineCandidate(InlineCandidate &Candidate,
+ SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
+ bool
+ inlineHotFunctionsWithPriority(Function &F,
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs);
// Inline cold/small functions in addition to hot ones
bool shouldInlineColdCallee(CallBase &CallInst);
void emitOptimizationRemarksForInlineCandidates(
@@ -808,7 +895,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
if (R) {
- uint64_t Samples = R.get();
+ uint64_t Samples = R.get() * Probe->Factor;
bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
if (FirstMark) {
ORE->emit([&]() {
@@ -816,13 +903,17 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
Remark << "Applied " << ore::NV("NumSamples", Samples);
Remark << " samples from profile (ProbeId=";
Remark << ore::NV("ProbeId", Probe->Id);
+ Remark << ", Factor=";
+ Remark << ore::NV("Factor", Probe->Factor);
+ Remark << ", OriginalSamples=";
+ Remark << ore::NV("OriginalSamples", R.get());
Remark << ")";
return Remark;
});
}
-
LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
- << " - weight: " << R.get() << ")\n");
+ << " - weight: " << R.get() << " - factor: "
+ << format("%0.2f", Probe->Factor) << ")\n");
return Samples;
}
return R;
@@ -918,6 +1009,31 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
return R;
}
+ auto FSCompare = [](const FunctionSamples *L, const FunctionSamples *R) {
+ assert(L && R && "Expect non-null FunctionSamples");
+ if (L->getEntrySamples() != R->getEntrySamples())
+ return L->getEntrySamples() > R->getEntrySamples();
+ return FunctionSamples::getGUID(L->getName()) <
+ FunctionSamples::getGUID(R->getName());
+ };
+
+ if (ProfileIsCS) {
+ auto CalleeSamples =
+ ContextTracker->getIndirectCalleeContextSamplesFor(DIL);
+ if (CalleeSamples.empty())
+ return R;
+
+ // For CSSPGO, we only use target context profile's entry count
+ // as that already includes both inlined callee and non-inlined ones..
+ Sum = 0;
+ for (const auto *const FS : CalleeSamples) {
+ Sum += FS->getEntrySamples();
+ R.push_back(FS);
+ }
+ llvm::sort(R, FSCompare);
+ return R;
+ }
+
const FunctionSamples *FS = findFunctionSamples(Inst);
if (FS == nullptr)
return R;
@@ -935,12 +1051,7 @@ SampleProfileLoader::findIndirectCallFunctionSamples(
Sum += NameFS.second.getEntrySamples();
R.push_back(&NameFS.second);
}
- llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
- if (L->getEntrySamples() != R->getEntrySamples())
- return L->getEntrySamples() > R->getEntrySamples();
- return FunctionSamples::getGUID(L->getName()) <
- FunctionSamples::getGUID(R->getName());
- });
+ llvm::sort(R, FSCompare);
}
return R;
}
@@ -977,42 +1088,64 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
return it.first->second;
}
-bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
- if (ExternalInlineAdvisor) {
- auto Advice = ExternalInlineAdvisor->getAdvice(CB);
- if (!Advice->isInliningRecommended()) {
- Advice->recordUnattemptedInlining();
- return false;
+/// Attempt to promote indirect call and also inline the promoted call.
+///
+/// \param F Caller function.
+/// \param Candidate ICP and inline candidate.
+/// \param Sum Sum of target counts for indirect call.
+/// \param PromotedInsns Map to keep track of indirect call already processed.
+/// \param Candidate ICP and inline candidate.
+/// \param InlinedCallSite Output vector for new call sites exposed after
+/// inlining.
+bool SampleProfileLoader::tryPromoteAndInlineCandidate(
+ Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
+ DenseSet<Instruction *> &PromotedInsns,
+ SmallVector<CallBase *, 8> *InlinedCallSite) {
+ const char *Reason = "Callee function not available";
+ // R->getValue() != &F is to prevent promoting a recursive call.
+ // If it is a recursive call, we do not inline it as it could bloat
+ // the code exponentially. There is way to better handle this, e.g.
+ // clone the caller first, and inline the cloned caller if it is
+ // recursive. As llvm does not inline recursive calls, we will
+ // simply ignore it instead of handling it explicitly.
+ auto R = SymbolMap.find(Candidate.CalleeSamples->getFuncName());
+ if (R != SymbolMap.end() && R->getValue() &&
+ !R->getValue()->isDeclaration() && R->getValue()->getSubprogram() &&
+ R->getValue()->hasFnAttribute("use-sample-profile") &&
+ R->getValue() != &F &&
+ isLegalToPromote(*Candidate.CallInstr, R->getValue(), &Reason)) {
+ auto *DI =
+ &pgo::promoteIndirectCall(*Candidate.CallInstr, R->getValue(),
+ Candidate.CallsiteCount, Sum, false, ORE);
+ if (DI) {
+ Sum -= Candidate.CallsiteCount;
+ // Prorate the indirect callsite distribution.
+ // Do not update the promoted direct callsite distribution at this
+ // point since the original distribution combined with the callee
+ // profile will be used to prorate callsites from the callee if
+ // inlined. Once not inlined, the direct callsite distribution should
+ // be prorated so that the it will reflect the real callsite counts.
+ setProbeDistributionFactor(*Candidate.CallInstr,
+ Candidate.CallsiteDistribution * Sum /
+ SumOrigin);
+ PromotedInsns.insert(Candidate.CallInstr);
+ Candidate.CallInstr = DI;
+ if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+ bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+ if (!Inlined) {
+ // Prorate the direct callsite distribution so that it reflects real
+ // callsite counts.
+ setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+ Candidate.CallsiteCount /
+ SumOrigin);
+ }
+ return Inlined;
+ }
}
- // Dummy record, we don't use it for replay.
- Advice->recordInlining();
- }
-
- Function *CalledFunction = CB.getCalledFunction();
- assert(CalledFunction);
- DebugLoc DLoc = CB.getDebugLoc();
- BasicBlock *BB = CB.getParent();
- InlineParams Params = getInlineParams();
- Params.ComputeFullInlineCost = true;
- // Checks if there is anything in the reachable portion of the callee at
- // this callsite that makes this inlining potentially illegal. Need to
- // set ComputeFullInlineCost, otherwise getInlineCost may return early
- // when cost exceeds threshold without checking all IRs in the callee.
- // The acutal cost does not matter because we only checks isNever() to
- // see if it is legal to inline the callsite.
- InlineCost Cost =
- getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC, GetTLI);
- if (Cost.isNever()) {
- ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
- << "incompatible inlining");
- return false;
- }
- InlineFunctionInfo IFI(nullptr, GetAC);
- if (InlineFunction(CB, IFI).isSuccess()) {
- // The call to InlineFunction erases I, so we can't pass it here.
- emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
- true, CSINLINE_DEBUG);
- return true;
+ } else {
+ LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
+ << Candidate.CalleeSamples->getFuncName() << " because "
+ << Reason << "\n");
}
return false;
}
@@ -1078,10 +1211,11 @@ bool SampleProfileLoader::inlineHotFunctions(
"ProfAccForSymsInList should be false when profile-sample-accurate "
"is enabled");
- DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
+ DenseMap<CallBase *, const FunctionSamples *> LocalNotInlinedCallSites;
bool Changed = false;
- while (true) {
- bool LocalChanged = false;
+ bool LocalChanged = true;
+ while (LocalChanged) {
+ LocalChanged = false;
SmallVector<CallBase *, 10> CIS;
for (auto &BB : F) {
bool Hot = false;
@@ -1095,7 +1229,7 @@ bool SampleProfileLoader::inlineHotFunctions(
"GUIDToFuncNameMap has to be populated");
AllCandidates.push_back(CB);
if (FS->getEntrySamples() > 0 || ProfileIsCS)
- localNotInlinedCallSites.try_emplace(CB, FS);
+ LocalNotInlinedCallSites.try_emplace(CB, FS);
if (callsiteIsHot(FS, PSI))
Hot = true;
else if (shouldInlineColdCallee(*CB))
@@ -1113,6 +1247,11 @@ bool SampleProfileLoader::inlineHotFunctions(
}
for (CallBase *I : CIS) {
Function *CalledFunction = I->getCalledFunction();
+ InlineCandidate Candidate = {
+ I,
+ LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+ : nullptr,
+ 0 /* dummy count */, 1.0 /* dummy distribution factor */};
// Do not inline recursive calls.
if (CalledFunction == &F)
continue;
@@ -1121,6 +1260,7 @@ bool SampleProfileLoader::inlineHotFunctions(
continue;
uint64_t Sum;
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+ uint64_t SumOrigin = Sum;
if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
PSI->getOrCompHotCountThreshold());
@@ -1129,65 +1269,34 @@ bool SampleProfileLoader::inlineHotFunctions(
if (!callsiteIsHot(FS, PSI))
continue;
- const char *Reason = "Callee function not available";
- // R->getValue() != &F is to prevent promoting a recursive call.
- // If it is a recursive call, we do not inline it as it could bloat
- // the code exponentially. There is way to better handle this, e.g.
- // clone the caller first, and inline the cloned caller if it is
- // recursive. As llvm does not inline recursive calls, we will
- // simply ignore it instead of handling it explicitly.
- auto CalleeFunctionName = FS->getFuncName();
- auto R = SymbolMap.find(CalleeFunctionName);
- if (R != SymbolMap.end() && R->getValue() &&
- !R->getValue()->isDeclaration() &&
- R->getValue()->getSubprogram() &&
- R->getValue()->hasFnAttribute("use-sample-profile") &&
- R->getValue() != &F &&
- isLegalToPromote(*I, R->getValue(), &Reason)) {
- uint64_t C = FS->getEntrySamples();
- auto &DI =
- pgo::promoteIndirectCall(*I, R->getValue(), C, Sum, false, ORE);
- Sum -= C;
- PromotedInsns.insert(I);
- // If profile mismatches, we should not attempt to inline DI.
- if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
- inlineCallInstruction(cast<CallBase>(DI))) {
- if (ProfileIsCS)
- ContextTracker->markContextSamplesInlined(FS);
- localNotInlinedCallSites.erase(I);
- LocalChanged = true;
- ++NumCSInlined;
- }
- } else {
- LLVM_DEBUG(dbgs()
- << "\nFailed to promote indirect call to "
- << CalleeFunctionName << " because " << Reason << "\n");
+ Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns)) {
+ LocalNotInlinedCallSites.erase(I);
+ LocalChanged = true;
}
}
} else if (CalledFunction && CalledFunction->getSubprogram() &&
!CalledFunction->isDeclaration()) {
- if (inlineCallInstruction(*I)) {
- if (ProfileIsCS)
- ContextTracker->markContextSamplesInlined(
- localNotInlinedCallSites[I]);
- localNotInlinedCallSites.erase(I);
+ if (tryInlineCandidate(Candidate)) {
+ LocalNotInlinedCallSites.erase(I);
LocalChanged = true;
- ++NumCSInlined;
}
} else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
findCalleeFunctionSamples(*I)->findInlinedFunctions(
InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
}
}
- if (LocalChanged) {
- Changed = true;
- } else {
- break;
- }
+ Changed |= LocalChanged;
}
+ // For CS profile, profile for not inlined context will be merged when
+ // base profile is being trieved
+ if (ProfileIsCS)
+ return Changed;
+
// Accumulate not inlined callsite information into notInlinedSamples
- for (const auto &Pair : localNotInlinedCallSites) {
+ for (const auto &Pair : LocalNotInlinedCallSites) {
CallBase *I = Pair.getFirst();
Function *Callee = I->getCalledFunction();
if (!Callee || Callee->isDeclaration())
@@ -1232,6 +1341,266 @@ bool SampleProfileLoader::inlineHotFunctions(
return Changed;
}
+bool SampleProfileLoader::tryInlineCandidate(
+ InlineCandidate &Candidate, SmallVector<CallBase *, 8> *InlinedCallSites) {
+
+ CallBase &CB = *Candidate.CallInstr;
+ Function *CalledFunction = CB.getCalledFunction();
+ assert(CalledFunction && "Expect a callee with definition");
+ DebugLoc DLoc = CB.getDebugLoc();
+ BasicBlock *BB = CB.getParent();
+
+ InlineCost Cost = shouldInlineCandidate(Candidate);
+ if (Cost.isNever()) {
+ ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
+ << "incompatible inlining");
+ return false;
+ }
+
+ if (!Cost)
+ return false;
+
+ InlineFunctionInfo IFI(nullptr, GetAC);
+ if (InlineFunction(CB, IFI).isSuccess()) {
+ // The call to InlineFunction erases I, so we can't pass it here.
+ emitInlinedInto(*ORE, DLoc, BB, *CalledFunction, *BB->getParent(), Cost,
+ true, CSINLINE_DEBUG);
+
+ // Now populate the list of newly exposed call sites.
+ if (InlinedCallSites) {
+ InlinedCallSites->clear();
+ for (auto &I : IFI.InlinedCallSites)
+ InlinedCallSites->push_back(I);
+ }
+
+ if (ProfileIsCS)
+ ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
+ ++NumCSInlined;
+
+ // Prorate inlined probes for a duplicated inlining callsite which probably
+ // has a distribution less than 100%. Samples for an inlinee should be
+ // distributed among the copies of the original callsite based on each
+ // callsite's distribution factor for counts accuracy. Note that an inlined
+ // probe may come with its own distribution factor if it has been duplicated
+ // in the inlinee body. The two factor are multiplied to reflect the
+ // aggregation of duplication.
+ if (Candidate.CallsiteDistribution < 1) {
+ for (auto &I : IFI.InlinedCallSites) {
+ if (Optional<PseudoProbe> Probe = extractProbe(*I))
+ setProbeDistributionFactor(*I, Probe->Factor *
+ Candidate.CallsiteDistribution);
+ }
+ NumDuplicatedInlinesite++;
+ }
+
+ return true;
+ }
+ return false;
+}
+
+bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
+ CallBase *CB) {
+ assert(CB && "Expect non-null call instruction");
+
+ if (isa<IntrinsicInst>(CB))
+ return false;
+
+ // Find the callee's profile. For indirect call, find hottest target profile.
+ const FunctionSamples *CalleeSamples = findCalleeFunctionSamples(*CB);
+ if (!CalleeSamples)
+ return false;
+
+ float Factor = 1.0;
+ if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+ Factor = Probe->Factor;
+
+ uint64_t CallsiteCount = 0;
+ ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
+ if (Weight)
+ CallsiteCount = Weight.get();
+ if (CalleeSamples)
+ CallsiteCount = std::max(
+ CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
+
+ *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
+ return true;
+}
+
+InlineCost
+SampleProfileLoader::shouldInlineCandidate(InlineCandidate &Candidate) {
+ std::unique_ptr<InlineAdvice> Advice = nullptr;
+ if (ExternalInlineAdvisor) {
+ Advice = ExternalInlineAdvisor->getAdvice(*Candidate.CallInstr);
+ if (!Advice->isInliningRecommended()) {
+ Advice->recordUnattemptedInlining();
+ return InlineCost::getNever("not previously inlined");
+ }
+ Advice->recordInlining();
+ return InlineCost::getAlways("previously inlined");
+ }
+
+ // Adjust threshold based on call site hotness, only do this for callsite
+ // prioritized inliner because otherwise cost-benefit check is done earlier.
+ int SampleThreshold = SampleColdCallSiteThreshold;
+ if (CallsitePrioritizedInline) {
+ if (Candidate.CallsiteCount > PSI->getHotCountThreshold())
+ SampleThreshold = SampleHotCallSiteThreshold;
+ else if (!ProfileSizeInline)
+ return InlineCost::getNever("cold callsite");
+ }
+
+ Function *Callee = Candidate.CallInstr->getCalledFunction();
+ assert(Callee && "Expect a definition for inline candidate of direct call");
+
+ InlineParams Params = getInlineParams();
+ Params.ComputeFullInlineCost = true;
+ // Checks if there is anything in the reachable portion of the callee at
+ // this callsite that makes this inlining potentially illegal. Need to
+ // set ComputeFullInlineCost, otherwise getInlineCost may return early
+ // when cost exceeds threshold without checking all IRs in the callee.
+ // The acutal cost does not matter because we only checks isNever() to
+ // see if it is legal to inline the callsite.
+ InlineCost Cost = getInlineCost(*Candidate.CallInstr, Callee, Params,
+ GetTTI(*Callee), GetAC, GetTLI);
+
+ // Honor always inline and never inline from call analyzer
+ if (Cost.isNever() || Cost.isAlways())
+ return Cost;
+
+ // For old FDO inliner, we inline the call site as long as cost is not
+ // "Never". The cost-benefit check is done earlier.
+ if (!CallsitePrioritizedInline) {
+ return InlineCost::get(Cost.getCost(), INT_MAX);
+ }
+
+ // Otherwise only use the cost from call analyzer, but overwite threshold with
+ // Sample PGO threshold.
+ return InlineCost::get(Cost.getCost(), SampleThreshold);
+}
+
+bool SampleProfileLoader::inlineHotFunctionsWithPriority(
+ Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
+ DenseSet<Instruction *> PromotedInsns;
+ assert(ProfileIsCS && "Prioritiy based inliner only works with CSSPGO now");
+
+ // ProfAccForSymsInList is used in callsiteIsHot. The assertion makes sure
+ // Profile symbol list is ignored when profile-sample-accurate is on.
+ assert((!ProfAccForSymsInList ||
+ (!ProfileSampleAccurate &&
+ !F.hasFnAttribute("profile-sample-accurate"))) &&
+ "ProfAccForSymsInList should be false when profile-sample-accurate "
+ "is enabled");
+
+ // Populating worklist with initial call sites from root inliner, along
+ // with call site weights.
+ CandidateQueue CQueue;
+ InlineCandidate NewCandidate;
+ for (auto &BB : F) {
+ for (auto &I : BB.getInstList()) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.push(NewCandidate);
+ }
+ }
+
+ // Cap the size growth from profile guided inlining. This is needed even
+ // though cost of each inline candidate already accounts for callee size,
+ // because with top-down inlining, we can grow inliner size significantly
+ // with large number of smaller inlinees each pass the cost check.
+ assert(ProfileInlineLimitMax >= ProfileInlineLimitMin &&
+ "Max inline size limit should not be smaller than min inline size "
+ "limit.");
+ unsigned SizeLimit = F.getInstructionCount() * ProfileInlineGrowthLimit;
+ SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
+ SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
+ if (ExternalInlineAdvisor)
+ SizeLimit = std::numeric_limits<unsigned>::max();
+
+ // Perform iterative BFS call site prioritized inlining
+ bool Changed = false;
+ while (!CQueue.empty() && F.getInstructionCount() < SizeLimit) {
+ InlineCandidate Candidate = CQueue.top();
+ CQueue.pop();
+ CallBase *I = Candidate.CallInstr;
+ Function *CalledFunction = I->getCalledFunction();
+
+ if (CalledFunction == &F)
+ continue;
+ if (I->isIndirectCall()) {
+ if (PromotedInsns.count(I))
+ continue;
+ uint64_t Sum;
+ auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
+ uint64_t SumOrigin = Sum;
+ Sum *= Candidate.CallsiteDistribution;
+ for (const auto *FS : CalleeSamples) {
+ // TODO: Consider disable pre-lTO ICP for MonoLTO as well
+ if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+ PSI->getOrCompHotCountThreshold());
+ continue;
+ }
+ uint64_t EntryCountDistributed =
+ FS->getEntrySamples() * Candidate.CallsiteDistribution;
+ // In addition to regular inline cost check, we also need to make sure
+ // ICP isn't introducing excessive speculative checks even if individual
+ // target looks beneficial to promote and inline. That means we should
+ // only do ICP when there's a small number dominant targets.
+ if (EntryCountDistributed < SumOrigin / ProfileICPThreshold)
+ break;
+ // TODO: Fix CallAnalyzer to handle all indirect calls.
+ // For indirect call, we don't run CallAnalyzer to get InlineCost
+ // before actual inlining. This is because we could see two different
+ // types from the same definition, which makes CallAnalyzer choke as
+ // it's expecting matching parameter type on both caller and callee
+ // side. See example from PR18962 for the triggering cases (the bug was
+ // fixed, but we generate different types).
+ if (!PSI->isHotCount(EntryCountDistributed))
+ break;
+ SmallVector<CallBase *, 8> InlinedCallSites;
+ // Attach function profile for promoted indirect callee, and update
+ // call site count for the promoted inline candidate too.
+ Candidate = {I, FS, EntryCountDistributed,
+ Candidate.CallsiteDistribution};
+ if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+ PromotedInsns, &InlinedCallSites)) {
+ for (auto *CB : InlinedCallSites) {
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.emplace(NewCandidate);
+ }
+ Changed = true;
+ }
+ }
+ } else if (CalledFunction && CalledFunction->getSubprogram() &&
+ !CalledFunction->isDeclaration()) {
+ SmallVector<CallBase *, 8> InlinedCallSites;
+ if (tryInlineCandidate(Candidate, &InlinedCallSites)) {
+ for (auto *CB : InlinedCallSites) {
+ if (getInlineCandidate(&NewCandidate, CB))
+ CQueue.emplace(NewCandidate);
+ }
+ Changed = true;
+ }
+ } else if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
+ findCalleeFunctionSamples(*I)->findInlinedFunctions(
+ InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
+ }
+ }
+
+ if (!CQueue.empty()) {
+ if (SizeLimit == (unsigned)ProfileInlineLimitMax)
+ ++NumCSInlinedHitMaxLimit;
+ else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
+ ++NumCSInlinedHitMinLimit;
+ else
+ ++NumCSInlinedHitGrowthLimit;
+ }
+
+ return Changed;
+}
+
/// Find equivalence classes for the given block.
///
/// This finds all the blocks that are guaranteed to execute the same
@@ -1654,6 +2023,14 @@ void SampleProfileLoader::propagateWeights(Function &F) {
auto T = FS->findCallTargetMapAt(CallSite);
if (!T || T.get().empty())
continue;
+ // Prorate the callsite counts to reflect what is already done to the
+ // callsite, such as ICP or calliste cloning.
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ if (Probe->Factor < 1)
+ T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+ }
+ }
SmallVector<InstrProfValueData, 2> SortedCallTargets =
GetSortedValueDataFromCallTargets(T.get());
uint64_t Sum;
@@ -1833,7 +2210,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
}
DenseSet<GlobalValue::GUID> InlinedGUIDs;
- Changed |= inlineHotFunctions(F, InlinedGUIDs);
+ if (ProfileIsCS && CallsitePrioritizedInline)
+ Changed |= inlineHotFunctionsWithPriority(F, InlinedGUIDs);
+ else
+ Changed |= inlineHotFunctions(F, InlinedGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
@@ -1978,6 +2358,12 @@ bool SampleProfileLoader::doInitialization(Module &M,
ProfileIsCS = true;
FunctionSamples::ProfileIsCS = true;
+ // Enable priority-base inliner and size inline by default for CSSPGO.
+ if (!ProfileSizeInline.getNumOccurrences())
+ ProfileSizeInline = true;
+ if (!CallsitePrioritizedInline.getNumOccurrences())
+ CallsitePrioritizedInline = true;
+
// Tracker for profiles under different context
ContextTracker =
std::make_unique<SampleContextTracker>(Reader->getProfiles());
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 7cecd20b78d8..a885c3ee4ded 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -12,6 +12,7 @@
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -25,8 +26,10 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
#include <vector>
using namespace llvm;
@@ -35,6 +38,115 @@ using namespace llvm;
STATISTIC(ArtificialDbgLine,
"Number of probes that have an artificial debug line");
+static cl::opt<bool>
+ VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+ cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+ "verify-pseudo-probe-funcs", cl::Hidden,
+ cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+ UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+ cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+ // Skip function declaration.
+ if (F->isDeclaration())
+ return false;
+ // Skip function that will not be emitted into object file. The prevailing
+ // defintion will be verified instead.
+ if (F->hasAvailableExternallyLinkage())
+ return false;
+ // Do a name matching.
+ static std::unordered_set<std::string> VerifyFuncNames(
+ VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+ return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+ if (VerifyPseudoProbe) {
+ PIC.registerAfterPassCallback(
+ [this](StringRef P, Any IR, const PreservedAnalyses &) {
+ this->runAfterPass(P, IR);
+ });
+ }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+ std::string Banner =
+ "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+ dbgs() << Banner;
+ if (any_isa<const Module *>(IR))
+ runAfterPass(any_cast<const Module *>(IR));
+ else if (any_isa<const Function *>(IR))
+ runAfterPass(any_cast<const Function *>(IR));
+ else if (any_isa<const LazyCallGraph::SCC *>(IR))
+ runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+ else if (any_isa<const Loop *>(IR))
+ runAfterPass(any_cast<const Loop *>(IR));
+ else
+ llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+ for (const Function &F : *M)
+ runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+ for (const LazyCallGraph::Node &N : *C)
+ runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+ if (!shouldVerifyFunction(F))
+ return;
+ ProbeFactorMap ProbeFactors;
+ for (const auto &BB : *F)
+ collectProbeFactors(&BB, ProbeFactors);
+ verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+ const Function *F = L->getHeader()->getParent();
+ runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+ ProbeFactorMap &ProbeFactors) {
+ for (const auto &I : *Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += Probe->Factor;
+ }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+ const Function *F, const ProbeFactorMap &ProbeFactors) {
+ bool BannerPrinted = false;
+ auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+ for (const auto &I : ProbeFactors) {
+ float CurProbeFactor = I.second;
+ if (PrevProbeFactors.count(I.first)) {
+ float PrevProbeFactor = PrevProbeFactors[I.first];
+ if (std::abs(CurProbeFactor - PrevProbeFactor) >
+ DistributionFactorVariance) {
+ if (!BannerPrinted) {
+ dbgs() << "Function " << F->getName() << ":\n";
+ BannerPrinted = true;
+ }
+ dbgs() << "Probe " << I.first << "\tprevious factor "
+ << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+ << format("%0.2f", CurProbeFactor) << "\n";
+ }
+ }
+
+ // Update
+ PrevProbeFactors[I.first] = I.second;
+ }
+}
+
PseudoProbeManager::PseudoProbeManager(const Module &M) {
if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
for (const auto *Operand : FuncInfo->operands()) {
@@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
Function *ProbeFn =
llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
- Builder.getInt32(0)};
+ Builder.getInt32(0),
+ Builder.getInt64(PseudoProbeFullDistributionFactor)};
auto *Probe = Builder.CreateCall(ProbeFn, Args);
AssignDebugLoc(Probe);
}
@@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
// Levarge the 32-bit discriminator field of debug data to store the ID and
// type of a callsite probe. This gets rid of the dependency on plumbing a
// customized metadata through the codegen pipeline.
- uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
if (auto DIL = Call->getDebugLoc()) {
DIL = DIL->cloneWithDiscriminator(V);
Call->setDebugLoc(DIL);
@@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M,
return PreservedAnalyses::none();
}
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+ FunctionAnalysisManager &FAM) {
+ BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ auto BBProfileCount = [&BFI](BasicBlock *BB) {
+ return BFI.getBlockProfileCount(BB)
+ ? BFI.getBlockProfileCount(BB).getValue()
+ : 0;
+ };
+
+ // Collect the sum of execution weight for each probe.
+ ProbeFactorMap ProbeFactors;
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I))
+ ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+ }
+ }
+
+ // Fix up over-counted probes.
+ for (auto &Block : F) {
+ for (auto &I : Block) {
+ if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+ float Sum = ProbeFactors[Probe->Id];
+ if (Sum != 0)
+ setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+ }
+ }
+ }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (UpdatePseudoProbe) {
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ runOnFunction(F, FAM);
+ }
+ }
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 18717394d384..822a786fc7c7 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1114,12 +1114,16 @@ void LoopUnswitch::emitPreheaderBranchOnCondition(
Loop *L = LI->getLoopFor(I->getParent());
auto *DefiningAccess = MemA->getDefiningAccess();
- // If the defining access is a MemoryPhi in the header, get the incoming
- // value for the pre-header as defining access.
- if (DefiningAccess->getBlock() == I->getParent()) {
+ // Get the first defining access before the loop.
+ while (L->contains(DefiningAccess->getBlock())) {
+ // If the defining access is a MemoryPhi, get the incoming
+ // value for the pre-header as defining access.
if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) {
DefiningAccess =
MemPhi->getIncomingValueForBlock(L->getLoopPreheader());
+ } else {
+ DefiningAccess =
+ cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
}
}
MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(),
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index f4afa3ad4623..dba5403f272a 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -170,16 +170,6 @@ static bool setRetAndArgsNoUndef(Function &F) {
return setRetNoUndef(F) | setArgsNoUndef(F);
}
-static bool setRetNonNull(Function &F) {
- assert(F.getReturnType()->isPointerTy() &&
- "nonnull applies only to pointers");
- if (F.hasAttribute(AttributeList::ReturnIndex, Attribute::NonNull))
- return false;
- F.addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
- ++NumNonNull;
- return true;
-}
-
static bool setReturnedArg(Function &F, unsigned ArgNo) {
if (F.hasParamAttribute(ArgNo, Attribute::Returned))
return false;
@@ -1005,63 +995,6 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
Changed |= setDoesNotCapture(F, 0);
Changed |= setDoesNotCapture(F, 1);
return Changed;
- case LibFunc_ZdlPvRKSt9nothrow_t: // delete(void*, nothrow)
- case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: // delete(void*, align_val_t, nothrow)
- case LibFunc_ZdaPvRKSt9nothrow_t: // delete[](void*, nothrow)
- case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: // delete[](void*, align_val_t, nothrow)
- Changed |= setDoesNotThrow(F);
- LLVM_FALLTHROUGH;
- case LibFunc_ZdlPv: // delete(void*)
- case LibFunc_ZdlPvj: // delete(void*, unsigned int)
- case LibFunc_ZdlPvm: // delete(void*, unsigned long)
- case LibFunc_ZdaPv: // delete[](void*)
- case LibFunc_ZdaPvj: // delete[](void*, unsigned int)
- case LibFunc_ZdaPvm: // delete[](void*, unsigned long)
- case LibFunc_ZdlPvSt11align_val_t: // delete(void*, align_val_t)
- case LibFunc_ZdlPvjSt11align_val_t: // delete(void*, unsigned int, align_val_t)
- case LibFunc_ZdlPvmSt11align_val_t: // delete(void*, unsigned long, align_val_t)
- case LibFunc_ZdaPvSt11align_val_t: // delete[](void*, align_val_t)
- case LibFunc_ZdaPvjSt11align_val_t: // delete[](void*, unsigned int, align_val_t)
- case LibFunc_ZdaPvmSt11align_val_t: // delete[](void*, unsigned long, align_val_t);
- Changed |= setOnlyAccessesInaccessibleMemOrArgMem(F);
- Changed |= setArgsNoUndef(F);
- Changed |= setWillReturn(F);
- Changed |= setDoesNotCapture(F, 0);
- return Changed;
- case LibFunc_ZnwjRKSt9nothrow_t: // new(unsigned int, nothrow)
- case LibFunc_ZnwmRKSt9nothrow_t: // new(unsigned long, nothrow)
- case LibFunc_ZnajRKSt9nothrow_t: // new[](unsigned int, nothrow)
- case LibFunc_ZnamRKSt9nothrow_t: // new[](unsigned long, nothrow)
- case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: // new(unsigned int, align_val_t, nothrow)
- case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: // new(unsigned long, align_val_t, nothrow)
- case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: // new[](unsigned int, align_val_t, nothrow)
- case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: // new[](unsigned long, align_val_t, nothrow)
- // Nothrow operator new may return null pointer
- Changed |= setDoesNotThrow(F);
- Changed |= setOnlyAccessesInaccessibleMemory(F);
- Changed |= setRetNoUndef(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setWillReturn(F);
- return Changed;
- case LibFunc_Znwj: // new(unsigned int)
- case LibFunc_Znwm: // new(unsigned long)
- case LibFunc_Znaj: // new[](unsigned int)
- case LibFunc_Znam: // new[](unsigned long)
- case LibFunc_ZnwjSt11align_val_t: // new(unsigned int, align_val_t)
- case LibFunc_ZnwmSt11align_val_t: // new(unsigned long, align_val_t)
- case LibFunc_ZnajSt11align_val_t: // new[](unsigned int, align_val_t)
- case LibFunc_ZnamSt11align_val_t: // new[](unsigned long, align_val_t)
- case LibFunc_msvc_new_int: // new(unsigned int)
- case LibFunc_msvc_new_longlong: // new(unsigned long long)
- case LibFunc_msvc_new_array_int: // new[](unsigned int)
- case LibFunc_msvc_new_array_longlong: // new[](unsigned long long)
- Changed |= setOnlyAccessesInaccessibleMemory(F);
- // Operator new always returns a nonnull noalias pointer
- Changed |= setRetNoUndef(F);
- Changed |= setRetNonNull(F);
- Changed |= setRetDoesNotAlias(F);
- Changed |= setWillReturn(F);
- return Changed;
// TODO: add LibFunc entries for:
// case LibFunc_memset_pattern4:
// case LibFunc_memset_pattern8:
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 0ac8fa537f4e..3026342cc4a6 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -921,14 +921,20 @@ void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) {
if (!I)
continue;
+ // Only update scopes when we find them in the map. If they are not, it is
+ // because we already handled that instruction before. This is faster than
+ // tracking which instructions we already updated.
if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope))
- I->setMetadata(LLVMContext::MD_alias_scope, MDMap[M]);
+ if (MDNode *MNew = MDMap.lookup(M))
+ I->setMetadata(LLVMContext::MD_alias_scope, MNew);
if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias))
- I->setMetadata(LLVMContext::MD_noalias, MDMap[M]);
+ if (MDNode *MNew = MDMap.lookup(M))
+ I->setMetadata(LLVMContext::MD_noalias, MNew);
if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
- Decl->setScopeList(MDMap[Decl->getScopeList()]);
+ if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
+ Decl->setScopeList(MNew);
}
}
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index cb5fee7d28e6..befacb591762 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -509,7 +509,7 @@ static void cloneLoopBlocks(
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
- LoopInfo *LI) {
+ LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
BasicBlock *PreHeader = L->getLoopPreheader();
@@ -545,6 +545,15 @@ static void cloneLoopBlocks(
}
}
+ {
+ // Identify what other metadata depends on the cloned version. After
+ // cloning, replace the metadata with the corrected version for both
+ // memory instructions and noalias intrinsics.
+ std::string Ext = (Twine("Peel") + Twine(IterNumber)).str();
+ cloneAndAdaptNoAliasScopes(LoopLocalNoAliasDeclScopes, NewBlocks,
+ Header->getContext(), Ext);
+ }
+
// Recursively create the new Loop objects for nested loops, if any,
// to preserve LoopInfo.
for (Loop *ChildLoop : *L) {
@@ -769,13 +778,19 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
uint64_t ExitWeight = 0, FallThroughWeight = 0;
initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight);
+ // Identify what noalias metadata is inside the loop: if it is inside the
+ // loop, the associated metadata must be cloned for each iteration.
+ SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
+ identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);
+
// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;
cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
- LoopBlocks, VMap, LVMap, DT, LI);
+ LoopBlocks, VMap, LVMap, DT, LI,
+ LoopLocalNoAliasDeclScopes);
// Remap to use values from the current iteration instead of the
// previous one.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea0d7673edf6..47635dbdda02 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5504,11 +5504,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return None;
}
- ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
-
switch (ScalarEpilogueStatus) {
case CM_ScalarEpilogueAllowed:
- return MaxVF;
+ return computeFeasibleMaxVF(TC, UserVF);
case CM_ScalarEpilogueNotAllowedUsePredicate:
LLVM_FALLTHROUGH;
case CM_ScalarEpilogueNotNeededUsePredicate:
@@ -5546,7 +5544,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking: vectorize with a "
"scalar epilogue instead.\n");
ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
- return MaxVF;
+ return computeFeasibleMaxVF(TC, UserVF);
}
return None;
}
@@ -5563,6 +5561,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
}
+ ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
assert(!MaxVF.isScalable() &&
"Scalable vectors do not yet support tail folding");
assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 8dc43924c067..7e53c30c7579 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -696,7 +696,7 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
Remapper ? remapSamples(I->second, *Remapper, Result)
: FunctionSamples();
FunctionSamples &Samples = Remapper ? Remapped : I->second;
- StringRef FName = Samples.getName();
+ StringRef FName = Samples.getNameWithContext(true);
MergeResult(Result, ProfileMap[FName].merge(Samples, Input.Weight));
if (Result != sampleprof_error::success) {
std::error_code EC = make_error_code(Result);
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 4be0d90a45d2..978d24c8300d 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -638,13 +638,13 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
std::max(maxArgAttrs, unsigned(intrinsic.ArgumentAttributes.size()));
unsigned &N = UniqAttributes[&intrinsic];
if (N) continue;
- assert(AttrNum < 256 && "Too many unique attributes for table!");
N = ++AttrNum;
+ assert(N < 65536 && "Too many unique attributes for table!");
}
// Emit an array of AttributeList. Most intrinsics will have at least one
// entry, for the function itself (index ~1), which is usually nounwind.
- OS << " static const uint8_t IntrinsicsToAttributesMap[] = {\n";
+ OS << " static const uint16_t IntrinsicsToAttributesMap[] = {\n";
for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
const CodeGenIntrinsic &intrinsic = Ints[i];
diff --git a/openmp/runtime/src/kmp_config.h.cmake b/openmp/runtime/src/kmp_config.h.cmake
index 3d682c690fc7..f6aee7197ee8 100644
--- a/openmp/runtime/src/kmp_config.h.cmake
+++ b/openmp/runtime/src/kmp_config.h.cmake
@@ -44,8 +44,8 @@
#define OMPT_DEBUG LIBOMP_OMPT_DEBUG
#cmakedefine01 LIBOMP_OMPT_SUPPORT
#define OMPT_SUPPORT LIBOMP_OMPT_SUPPORT
-#cmakedefine01 LIBOMPTARGET_PROFILING_SUPPORT
-#define OMPTARGET_PROFILING_SUPPORT LIBOMPTARGET_PROFILING_SUPPORT
+#cmakedefine01 LIBOMP_PROFILING_SUPPORT
+#define OMP_PROFILING_SUPPORT LIBOMP_PROFILING_SUPPORT
#cmakedefine01 LIBOMP_OMPT_OPTIONAL
#define OMPT_OPTIONAL LIBOMP_OMPT_OPTIONAL
#cmakedefine01 LIBOMP_USE_ADAPTIVE_LOCKS
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index 4a0634d59cff..a6e32bd008e1 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -32,7 +32,7 @@
#include "ompt-specific.h"
#endif
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
#include "llvm/Support/TimeProfiler.h"
static char *ProfileTraceFile = nullptr;
#endif
@@ -5740,7 +5740,7 @@ void __kmp_free_thread(kmp_info_t *this_th) {
/* ------------------------------------------------------------------------ */
void *__kmp_launch_thread(kmp_info_t *this_thr) {
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");
// TODO: add a configuration option for time granularity
if (ProfileTraceFile)
@@ -5848,7 +5848,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));
KMP_MB();
-#if OMPTARGET_PROFILING_SUPPORT
+#if OMP_PROFILING_SUPPORT
llvm::timeTraceProfilerFinishThread();
#endif
return this_thr;
diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp
index a8522130f972..b477edbbfb42 100644
--- a/openmp/runtime/src/kmp_settings.cpp
+++ b/openmp/runtime/src/kmp_settings.cpp
@@ -3355,7 +3355,8 @@ static void __kmp_stg_parse_allocator(char const *name, char const *value,
ntraits++;
}
}
- omp_alloctrait_t traits[ntraits];
+ omp_alloctrait_t *traits =
+ (omp_alloctrait_t *)KMP_ALLOCA(ntraits * sizeof(omp_alloctrait_t));
// Helper macros
#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0)