aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/lib/Basic/Targets/PPC.cpp1
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp32
-rw-r--r--clang/lib/CodeGen/CodeGenModule.cpp12
-rwxr-xr-xclang/lib/Format/TokenAnnotator.cpp12
-rw-r--r--clang/lib/Format/UnwrappedLineFormatter.cpp7
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp89
-rw-r--r--clang/lib/Format/UnwrappedLineParser.h12
-rw-r--r--clang/lib/Headers/ppc_wrappers/xmmintrin.h9
-rw-r--r--clang/lib/Sema/SemaChecking.cpp5
-rw-r--r--libcxx/include/memory32
-rw-r--r--lld/ELF/SyntheticSections.cpp4
-rw-r--r--lldb/docs/man/lldb.rst4
-rw-r--r--lldb/tools/driver/Driver.cpp4
-rw-r--r--llvm/include/llvm/ADT/Any.h7
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h1
-rw-r--r--llvm/include/llvm/IR/Constant.h4
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp15
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp8
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp4
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp4
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp15
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp44
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp24
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp33
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp24
-rw-r--r--llvm/lib/IR/Constants.cpp12
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp11
-rw-r--r--llvm/lib/Support/CommandLine.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp43
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp22
-rw-r--r--llvm/lib/Target/BPF/BPFMIPeephole.cpp3
-rw-r--r--llvm/lib/Target/BPF/BPFPreserveDIType.cpp9
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.h2
-rw-r--r--llvm/lib/Target/BPF/BPFTargetTransformInfo.h61
-rw-r--r--llvm/lib/Target/BPF/BTF.def1
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp62
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.h11
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp365
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp62
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h1
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.td43
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp21
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td20
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp13
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td11
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp17
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp5
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp58
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRerollPass.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp106
59 files changed, 913 insertions, 504 deletions
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index ff09c0fa2a23..6c3036836c6d 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -57,6 +57,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
} else if (Feature == "+pcrelative-memops") {
HasPCRelativeMemops = true;
} else if (Feature == "+spe" || Feature == "+efpu2") {
+ HasStrictFP = false;
HasSPE = true;
LongDoubleWidth = LongDoubleAlign = 64;
LongDoubleFormat = &llvm::APFloat::IEEEdouble();
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 83dfa0780547..caa5291ff6fa 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -409,6 +409,7 @@ class InlinedOpenMPRegionRAII {
llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
FieldDecl *LambdaThisCaptureField = nullptr;
const CodeGen::CGBlockInfo *BlockInfo = nullptr;
+ bool NoInheritance = false;
public:
/// Constructs region for combined constructs.
@@ -416,16 +417,19 @@ public:
/// a list of functions used for code generation of implicitly inlined
/// regions.
InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
- OpenMPDirectiveKind Kind, bool HasCancel)
- : CGF(CGF) {
+ OpenMPDirectiveKind Kind, bool HasCancel,
+ bool NoInheritance = true)
+ : CGF(CGF), NoInheritance(NoInheritance) {
// Start emission for the construct.
CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
- std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
- LambdaThisCaptureField = CGF.LambdaThisCaptureField;
- CGF.LambdaThisCaptureField = nullptr;
- BlockInfo = CGF.BlockInfo;
- CGF.BlockInfo = nullptr;
+ if (NoInheritance) {
+ std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
+ LambdaThisCaptureField = CGF.LambdaThisCaptureField;
+ CGF.LambdaThisCaptureField = nullptr;
+ BlockInfo = CGF.BlockInfo;
+ CGF.BlockInfo = nullptr;
+ }
}
~InlinedOpenMPRegionRAII() {
@@ -434,9 +438,11 @@ public:
cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
delete CGF.CapturedStmtInfo;
CGF.CapturedStmtInfo = OldCSI;
- std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
- CGF.LambdaThisCaptureField = LambdaThisCaptureField;
- CGF.BlockInfo = BlockInfo;
+ if (NoInheritance) {
+ std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
+ CGF.LambdaThisCaptureField = LambdaThisCaptureField;
+ CGF.BlockInfo = BlockInfo;
+ }
}
};
@@ -3853,7 +3859,7 @@ static void emitPrivatesInit(CodeGenFunction &CGF,
// Processing for implicitly captured variables.
InlinedOpenMPRegionRAII Region(
CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
- /*HasCancel=*/false);
+ /*HasCancel=*/false, /*NoInheritance=*/true);
SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
}
if (Type->isArrayType()) {
@@ -6214,7 +6220,9 @@ void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
bool HasCancel) {
if (!CGF.HaveInsertPoint())
return;
- InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
+ InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
+ InnerKind != OMPD_critical &&
+ InnerKind != OMPD_master);
CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
}
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 31afbc6b4262..9c9bd4e374af 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -6215,15 +6215,17 @@ llvm::SanitizerStatReport &CodeGenModule::getSanStats() {
return *SanStats;
}
+
llvm::Value *
CodeGenModule::createOpenCLIntToSamplerConversion(const Expr *E,
CodeGenFunction &CGF) {
llvm::Constant *C = ConstantEmitter(CGF).emitAbstract(E, E->getType());
- auto SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr());
- auto FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false);
- return CGF.Builder.CreateCall(CreateRuntimeFunction(FTy,
- "__translate_sampler_initializer"),
- {C});
+ auto *SamplerT = getOpenCLRuntime().getSamplerType(E->getType().getTypePtr());
+ auto *FTy = llvm::FunctionType::get(SamplerT, {C->getType()}, false);
+ auto *Call = CGF.Builder.CreateCall(
+ CreateRuntimeFunction(FTy, "__translate_sampler_initializer"), {C});
+ Call->setCallingConv(Call->getCalledFunction()->getCallingConv());
+ return Call;
}
CharUnits CodeGenModule::getNaturalPointeeTypeAlignment(
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 34c291ecc492..82d6cfed308d 100755
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1917,12 +1917,12 @@ private:
if (Tok.Next->isOneOf(tok::identifier, tok::kw_this))
return true;
- if (Tok.Next->is(tok::l_paren) &&
- !(Tok.Previous && Tok.Previous->is(tok::identifier) &&
- Tok.Previous->Previous &&
- Tok.Previous->Previous->isOneOf(tok::arrowstar, tok::arrow,
- tok::star)))
- return true;
+ // Look for a cast `( x ) (`.
+ if (Tok.Next->is(tok::l_paren) && Tok.Previous && Tok.Previous->Previous) {
+ if (Tok.Previous->is(tok::identifier) &&
+ Tok.Previous->Previous->is(tok::l_paren))
+ return true;
+ }
if (!Tok.Next->Next)
return false;
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index 5dd0ccdfa6fd..7d197310e65b 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -1281,13 +1281,6 @@ void UnwrappedLineFormatter::formatFirstToken(
if (Newlines)
Indent = NewlineIndent;
- // If in Whitemsmiths mode, indent start and end of blocks
- if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
- if (RootToken.isOneOf(tok::l_brace, tok::r_brace, tok::kw_case,
- tok::kw_default))
- Indent += Style.IndentWidth;
- }
-
// Preprocessor directives get indented before the hash only if specified
if (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
(Line.Type == LT_PreprocessorDirective ||
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index f689a6361a3a..bec18bd5d8df 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -579,17 +579,23 @@ size_t UnwrappedLineParser::computePPHash() const {
return h;
}
-void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
- bool MunchSemi) {
+void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
+ bool MunchSemi,
+ bool UnindentWhitesmithsBraces) {
assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
"'{' or macro block token expected");
const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
FormatTok->setBlockKind(BK_Block);
+ // For Whitesmiths mode, jump to the next level prior to skipping over the
+ // braces.
+ if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
+ ++Line->Level;
+
size_t PPStartHash = computePPHash();
unsigned InitialLevel = Line->Level;
- nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
+ nextToken(/*LevelDifference=*/AddLevels);
if (MacroBlock && FormatTok->is(tok::l_paren))
parseParens();
@@ -602,10 +608,16 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
? (UnwrappedLine::kInvalidIndex)
: (CurrentLines->size() - 1 - NbPreprocessorDirectives);
+ // Whitesmiths is weird here. The brace needs to be indented for the namespace
+ // block, but the block itself may not be indented depending on the style
+ // settings. This allows the format to back up one level in those cases.
+ if (UnindentWhitesmithsBraces)
+ --Line->Level;
+
ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
MustBeDeclaration);
- if (AddLevel)
- ++Line->Level;
+ if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
+ Line->Level += AddLevels;
parseLevel(/*HasOpeningBrace=*/true);
if (eof())
@@ -621,7 +633,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
size_t PPEndHash = computePPHash();
// Munch the closing brace.
- nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
+ nextToken(/*LevelDifference=*/-AddLevels);
if (MacroBlock && FormatTok->is(tok::l_paren))
parseParens();
@@ -637,6 +649,7 @@ void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
nextToken();
Line->Level = InitialLevel;
+ FormatTok->setBlockKind(BK_Block);
if (PPStartHash == PPEndHash) {
Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
@@ -2128,15 +2141,34 @@ void UnwrappedLineParser::parseNamespace() {
if (ShouldBreakBeforeBrace(Style, InitialToken))
addUnwrappedLine();
- bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
- (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
- DeclarationScopeStack.size() > 1);
- parseBlock(/*MustBeDeclaration=*/true, AddLevel);
+ unsigned AddLevels =
+ Style.NamespaceIndentation == FormatStyle::NI_All ||
+ (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
+ DeclarationScopeStack.size() > 1)
+ ? 1u
+ : 0u;
+ bool ManageWhitesmithsBraces =
+ AddLevels == 0u &&
+ Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
+
+ // If we're in Whitesmiths mode, indent the brace if we're not indenting
+ // the whole block.
+ if (ManageWhitesmithsBraces)
+ ++Line->Level;
+
+ parseBlock(/*MustBeDeclaration=*/true, AddLevels,
+ /*MunchSemi=*/true,
+ /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
+
// Munch the semicolon after a namespace. This is more common than one would
// think. Putting the semicolon into its own line is very ugly.
if (FormatTok->Tok.is(tok::semi))
nextToken();
- addUnwrappedLine();
+
+ addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
+
+ if (ManageWhitesmithsBraces)
+ --Line->Level;
}
// FIXME: Add error handling.
}
@@ -2222,6 +2254,11 @@ void UnwrappedLineParser::parseDoWhile() {
return;
}
+ // If in Whitesmiths mode, the line with the while() needs to be indented
+ // to the same level as the block.
+ if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
+ ++Line->Level;
+
nextToken();
parseStructuralElement();
}
@@ -2234,25 +2271,19 @@ void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
if (LeftAlignLabel)
Line->Level = 0;
- bool RemoveWhitesmithsCaseIndent =
- (!Style.IndentCaseBlocks &&
- Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths);
-
- if (RemoveWhitesmithsCaseIndent)
- --Line->Level;
-
if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
FormatTok->Tok.is(tok::l_brace)) {
- CompoundStatementIndenter Indenter(
- this, Line->Level, Style.BraceWrapping.AfterCaseLabel,
- Style.BraceWrapping.IndentBraces || RemoveWhitesmithsCaseIndent);
+ CompoundStatementIndenter Indenter(this, Line->Level,
+ Style.BraceWrapping.AfterCaseLabel,
+ Style.BraceWrapping.IndentBraces);
parseBlock(/*MustBeDeclaration=*/false);
if (FormatTok->Tok.is(tok::kw_break)) {
if (Style.BraceWrapping.AfterControlStatement ==
FormatStyle::BWACS_Always) {
addUnwrappedLine();
- if (RemoveWhitesmithsCaseIndent) {
+ if (!Style.IndentCaseBlocks &&
+ Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
Line->Level++;
}
}
@@ -2920,17 +2951,29 @@ LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
llvm::dbgs() << "\n";
}
-void UnwrappedLineParser::addUnwrappedLine() {
+void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
if (Line->Tokens.empty())
return;
LLVM_DEBUG({
if (CurrentLines == &Lines)
printDebugInfo(*Line);
});
+
+ // If this line closes a block when in Whitesmiths mode, remember that
+ // information so that the level can be decreased after the line is added.
+ // This has to happen after the addition of the line since the line itself
+ // needs to be indented.
+ bool ClosesWhitesmithsBlock =
+ Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
+ Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
+
CurrentLines->push_back(std::move(*Line));
Line->Tokens.clear();
Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
Line->FirstStartColumn = 0;
+
+ if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
+ --Line->Level;
if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
CurrentLines->append(
std::make_move_iterator(PreprocessorDirectives.begin()),
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 02b328cb72de..ce135fac5e57 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -85,8 +85,9 @@ private:
void reset();
void parseFile();
void parseLevel(bool HasOpeningBrace);
- void parseBlock(bool MustBeDeclaration, bool AddLevel = true,
- bool MunchSemi = true);
+ void parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1u,
+ bool MunchSemi = true,
+ bool UnindentWhitesmithsBraces = false);
void parseChildBlock();
void parsePPDirective();
void parsePPDefine();
@@ -140,7 +141,12 @@ private:
bool tryToParsePropertyAccessor();
void tryToParseJSFunction();
bool tryToParseSimpleAttribute();
- void addUnwrappedLine();
+
+ // Used by addUnwrappedLine to denote whether to keep or remove a level
+ // when resetting the line state.
+ enum class LineLevel { Remove, Keep };
+
+ void addUnwrappedLine(LineLevel AdjustLevel = LineLevel::Remove);
bool eof() const;
// LevelDifference is the difference of levels after and before the current
// token. For example:
diff --git a/clang/lib/Headers/ppc_wrappers/xmmintrin.h b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
index 0f429fa04081..0e45b96769f8 100644
--- a/clang/lib/Headers/ppc_wrappers/xmmintrin.h
+++ b/clang/lib/Headers/ppc_wrappers/xmmintrin.h
@@ -28,7 +28,7 @@
Most SSE scalar float intrinsic operations can be performed more
efficiently as C language float scalar operations or optimized to
use vector SIMD operations. We recommend this for new applications. */
-#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
+#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
#endif
#ifndef _XMMINTRIN_H_INCLUDED
@@ -62,14 +62,13 @@
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
-typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
+typedef vector float __m128 __attribute__((__may_alias__));
/* Unaligned version of the same type. */
-typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__,
- __aligned__ (1)));
+typedef vector float __m128_u __attribute__((__may_alias__, __aligned__(1)));
/* Internal data types for implementing the intrinsics. */
-typedef float __v4sf __attribute__ ((__vector_size__ (16)));
+typedef vector float __v4sf;
/* Create an undefined vector. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2d3d36f4adad..2b55712d44c2 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2623,7 +2623,10 @@ static bool isValidBPFPreserveEnumValueArg(Expr *Arg) {
return false;
const auto *CE = dyn_cast<CStyleCastExpr>(UO->getSubExpr());
- if (!CE || CE->getCastKind() != CK_IntegralToPointer)
+ if (!CE)
+ return false;
+ if (CE->getCastKind() != CK_IntegralToPointer &&
+ CE->getCastKind() != CK_NullToPointer)
return false;
// The integer must be from an EnumConstantDecl.
diff --git a/libcxx/include/memory b/libcxx/include/memory
index 39d0f5bee6a5..efb10c8fd25b 100644
--- a/libcxx/include/memory
+++ b/libcxx/include/memory
@@ -99,14 +99,14 @@ struct allocator_traits
};
template <>
-class allocator<void> // deprecated in C++17, removed in C++20
+class allocator<void> // removed in C++20
{
public:
- typedef void* pointer;
- typedef const void* const_pointer;
- typedef void value_type;
+ typedef void* pointer; // deprecated in C++17
+ typedef const void* const_pointer; // deprecated in C++17
+ typedef void value_type; // deprecated in C++17
- template <class _Up> struct rebind {typedef allocator<_Up> other;};
+ template <class _Up> struct rebind {typedef allocator<_Up> other;}; // deprecated in C++17
};
template <class T>
@@ -786,27 +786,27 @@ to_address(const _Pointer& __p) _NOEXCEPT
template <class _Tp> class allocator;
-#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_ALLOCATOR_MEMBERS)
+#if _LIBCPP_STD_VER <= 17
template <>
-class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator<void>
+class _LIBCPP_TEMPLATE_VIS allocator<void>
{
public:
- typedef void* pointer;
- typedef const void* const_pointer;
- typedef void value_type;
+ _LIBCPP_DEPRECATED_IN_CXX17 typedef void* pointer;
+ _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer;
+ _LIBCPP_DEPRECATED_IN_CXX17 typedef void value_type;
- template <class _Up> struct rebind {typedef allocator<_Up> other;};
+ template <class _Up> struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;};
};
template <>
-class _LIBCPP_TEMPLATE_VIS _LIBCPP_DEPRECATED_IN_CXX17 allocator<const void>
+class _LIBCPP_TEMPLATE_VIS allocator<const void>
{
public:
- typedef const void* pointer;
- typedef const void* const_pointer;
- typedef const void value_type;
+ _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer;
+ _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer;
+ _LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type;
- template <class _Up> struct rebind {typedef allocator<_Up> other;};
+ template <class _Up> struct _LIBCPP_DEPRECATED_IN_CXX17 rebind {typedef allocator<_Up> other;};
};
#endif
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 9a875bd7ec3e..70c36c63d101 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -3110,7 +3110,9 @@ size_t VersionTableSection::getSize() const {
void VersionTableSection::writeTo(uint8_t *buf) {
buf += 2;
for (const SymbolTableEntry &s : getPartition().dynSymTab->getSymbols()) {
- write16(buf, s.sym->versionId);
+ // Use the original versionId for an unfetched lazy symbol (undefined weak),
+ // which must be VER_NDX_GLOBAL (an undefined versioned symbol is an error).
+ write16(buf, s.sym->isLazy() ? VER_NDX_GLOBAL : s.sym->versionId);
buf += 2;
}
}
diff --git a/lldb/docs/man/lldb.rst b/lldb/docs/man/lldb.rst
index 6dca15fa35dc..b75288db380d 100644
--- a/lldb/docs/man/lldb.rst
+++ b/lldb/docs/man/lldb.rst
@@ -256,11 +256,11 @@ executable. To disambiguate between arguments passed to lldb and arguments
passed to the debugged executable, arguments starting with a - must be passed
after --.
- lldb --arch x86_64 /path/to/program program argument -- --arch arvm7
+ lldb --arch x86_64 /path/to/program program argument -- --arch armv7
For convenience, passing the executable after -- is also supported.
- lldb --arch x86_64 -- /path/to/program program argument --arch arvm7
+ lldb --arch x86_64 -- /path/to/program program argument --arch armv7
Passing one of the attach options causes :program:`lldb` to immediately attach
to the given process.
diff --git a/lldb/tools/driver/Driver.cpp b/lldb/tools/driver/Driver.cpp
index e4a60127b65e..210a712f9741 100644
--- a/lldb/tools/driver/Driver.cpp
+++ b/lldb/tools/driver/Driver.cpp
@@ -751,11 +751,11 @@ EXAMPLES:
arguments passed to the debugged executable, arguments starting with a - must
be passed after --.
- lldb --arch x86_64 /path/to/program program argument -- --arch arvm7
+ lldb --arch x86_64 /path/to/program program argument -- --arch armv7
For convenience, passing the executable after -- is also supported.
- lldb --arch x86_64 -- /path/to/program program argument --arch arvm7
+ lldb --arch x86_64 -- /path/to/program program argument --arch armv7
Passing one of the attach options causes lldb to immediately attach to the
given process.
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index 0aded628cda4..1e3abca70679 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -23,7 +23,12 @@
namespace llvm {
-class Any {
+class LLVM_EXTERNAL_VISIBILITY Any {
+
+ // The `Typeid<T>::Id` static data member below is a globally unique
+ // identifier for the type `T`. It is explicitly marked with default
+ // visibility so that when `-fvisibility=hidden` is used, the loader still
+ // merges duplicate definitions across DSO boundaries.
template <typename T> struct TypeId { static const char Id; };
struct StorageBase {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index c3b494e94ff1..4a982b00125d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -316,6 +316,7 @@ public:
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
+ LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
diff --git a/llvm/include/llvm/IR/Constant.h b/llvm/include/llvm/IR/Constant.h
index 0190aca27b72..71692c746015 100644
--- a/llvm/include/llvm/IR/Constant.h
+++ b/llvm/include/llvm/IR/Constant.h
@@ -214,6 +214,10 @@ public:
/// both must either be scalars or vectors with the same element count. If no
/// changes are made, the constant C is returned.
static Constant *mergeUndefsWith(Constant *C, Constant *Other);
+
+ /// Return true if a constant is ConstantData or a ConstantAggregate or
+ /// ConstantExpr that contain only ConstantData.
+ bool isManifestConstant() const;
};
} // end namespace llvm
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index f73890d548f0..cc1ce4c65821 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1808,19 +1808,6 @@ double getValueAsDouble(ConstantFP *Op) {
return APF.convertToDouble();
}
-static bool isManifestConstant(const Constant *c) {
- if (isa<ConstantData>(c)) {
- return true;
- } else if (isa<ConstantAggregate>(c) || isa<ConstantExpr>(c)) {
- for (const Value *subc : c->operand_values()) {
- if (!isManifestConstant(cast<Constant>(subc)))
- return false;
- }
- return true;
- }
- return false;
-}
-
static bool getConstIntOrUndef(Value *Op, const APInt *&C) {
if (auto *CI = dyn_cast<ConstantInt>(Op)) {
C = &CI->getValue();
@@ -1845,7 +1832,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
// We know we have a "Constant" argument. But we want to only
// return true for manifest constants, not those that depend on
// constants with unknowable values, e.g. GlobalValue or BlockAddress.
- if (isManifestConstant(Operands[0]))
+ if (Operands[0]->isManifestConstant())
return ConstantInt::getTrue(Ty->getContext());
return nullptr;
}
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index c40e5c36cdc7..a12816885c40 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4127,10 +4127,12 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
TrueVal, FalseVal))
return V;
- // If we have an equality comparison, then we know the value in one of the
- // arms of the select. See if substituting this value into the arm and
+ // If we have a scalar equality comparison, then we know the value in one of
+ // the arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
- if (Pred == ICmpInst::ICMP_EQ) {
+ // Note that the equivalence/replacement opportunity does not hold for vectors
+ // because each element of a vector select is chosen independently.
+ if (Pred == ICmpInst::ICMP_EQ && !CondVal->getType()->isVectorTy()) {
if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
/* AllowRefinement */ false, MaxRecurse) ==
TrueVal ||
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 895936d47175..886b5bf4acd3 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -344,7 +344,9 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer
// operand didn't change.
- if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
+ if ((isa<LoadInst>(U) ||
+ (isa<StoreInst>(U) &&
+ cast<StoreInst>(U)->getPointerOperand() == Ptr)) &&
U->hasMetadata(LLVMContext::MD_invariant_group))
ClosestDependency = GetClosestDependency(ClosestDependency, U);
}
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index fe9d8297d679..1a9ae68573e9 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -10622,6 +10622,10 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
if (!dominates(RHS, IncBB))
return false;
const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB));
+ // Make sure L does not refer to a value from a potentially previous
+ // iteration of a loop.
+ if (!properlyDominates(L, IncBB))
+ return false;
if (!ProvedEasily(L, RHS))
return false;
}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e174c5efe424..75486d3c80e7 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5150,6 +5150,9 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
return false;
}
+ // Limit number of instructions we look at, to avoid scanning through large
+ // blocks. The current limit is chosen arbitrarily.
+ unsigned ScanLimit = 32;
BasicBlock::const_iterator End = BB->end();
if (!PoisonOnly) {
@@ -5160,6 +5163,11 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
// For example, 'udiv x, (undef | 1)' isn't UB.
for (auto &I : make_range(Begin, End)) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (--ScanLimit == 0)
+ break;
+
if (const auto *CB = dyn_cast<CallBase>(&I)) {
for (unsigned i = 0; i < CB->arg_size(); ++i) {
if (CB->paramHasAttr(i, Attribute::NoUndef) &&
@@ -5186,9 +5194,12 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
for_each(V->users(), Propagate);
Visited.insert(BB);
- unsigned Iter = 0;
- while (Iter++ < MaxAnalysisRecursionDepth) {
+ while (true) {
for (auto &I : make_range(Begin, End)) {
+ if (isa<DbgInfoIntrinsic>(I))
+ continue;
+ if (--ScanLimit == 0)
+ return false;
if (mustTriggerUB(&I, YieldsPoison))
return true;
if (!isGuaranteedToTransferExecutionToSuccessor(&I))
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b97c369b832d..b7883cbc3120 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -840,9 +840,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
// For conditional branch lowering, we might try to do something silly like
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
// just re-use the existing condition vreg.
- if (CI && CI->getZExtValue() == 1 &&
- MRI->getType(CondLHS).getSizeInBits() == 1 &&
- CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
+ CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
Cond = CondLHS;
} else {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3178ee16af2b..66871ca3b926 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1257,22 +1257,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
Observer.changedInstr(MI);
return Legalized;
}
- case TargetOpcode::G_FPTOUI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
- case TargetOpcode::G_FPTOSI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
- Observer.changedInstr(MI);
- return Legalized;
- }
+ case TargetOpcode::G_FPTOUI:
+ case TargetOpcode::G_FPTOSI:
+ return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_FPEXT:
if (TypeIdx != 0)
return UnableToLegalize;
@@ -4497,6 +4484,31 @@ LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
+
+ Register Src = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src);
+
+ // If all finite floats fit into the narrowed integer type, we can just swap
+ // out the result type. This is practically only useful for conversions from
+ // half to at least 16-bits, so just handle the one case.
+ if (SrcTy.getScalarType() != LLT::scalar(16) ||
+ NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0,
+ IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
if (TypeIdx != 1)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6a6f83827f72..7f2add81e80d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7105,14 +7105,22 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
if (LegalOperations)
return SDValue();
- // Collect all the stores in the chain.
- SDValue Chain;
- SmallVector<StoreSDNode *, 8> Stores;
- for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
- // TODO: Allow unordered atomics when wider type is legal (see D66309)
- EVT MemVT = Store->getMemoryVT();
- if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
- !Store->isSimple() || Store->isIndexed())
+ // We only handle merging simple stores of 1-4 bytes.
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
+ EVT MemVT = N->getMemoryVT();
+ if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
+ !N->isSimple() || N->isIndexed())
+ return SDValue();
+
+ // Collect all of the stores in the chain.
+ SDValue Chain = N->getChain();
+ SmallVector<StoreSDNode *, 8> Stores = {N};
+ while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
+ // All stores must be the same size to ensure that we are writing all of the
+ // bytes in the wide value.
+ // TODO: We could allow multiple sizes by tracking each stored byte.
+ if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
+ Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 7bae5048fc0e..d17dd1c5eccb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1691,9 +1691,9 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
/// terminator, but additionally the copies that move the vregs into the
/// physical registers.
static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB) {
+FindSplitPointForStackProtector(MachineBasicBlock *BB,
+ const TargetInstrInfo &TII) {
MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
- //
if (SplitPoint == BB->begin())
return SplitPoint;
@@ -1701,6 +1701,31 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) {
MachineBasicBlock::iterator Previous = SplitPoint;
--Previous;
+ if (TII.isTailCall(*SplitPoint) &&
+ Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) {
+ // call itself, then we must insert before the sequence even starts. For
+ // example:
+ // <split point>
+ // ADJCALLSTACKDOWN ...
+ // <Moves>
+ // ADJCALLSTACKUP ...
+ // TAILJMP somewhere
+ // On the other hand, it could be an unrelated call in which case this tail call
+ // has to register moves of its own and should be the split point. For example:
+ // ADJCALLSTACKDOWN
+ // CALL something_else
+ // ADJCALLSTACKUP
+ // <split point>
+ // TAILJMP somewhere
+ do {
+ --Previous;
+ if (Previous->isCall())
+ return SplitPoint;
+ } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode());
+
+ return Previous;
+ }
+
while (MIIsInTerminatorSequence(*Previous)) {
SplitPoint = Previous;
if (Previous == Start)
@@ -1740,7 +1765,7 @@ SelectionDAGISel::FinishBasicBlock() {
// Add load and check to the basicblock.
FuncInfo->MBB = ParentMBB;
FuncInfo->InsertPt =
- FindSplitPointForStackProtector(ParentMBB);
+ FindSplitPointForStackProtector(ParentMBB, *TII);
SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
@@ -1759,7 +1784,7 @@ SelectionDAGISel::FinishBasicBlock() {
// register allocation issues caused by us splitting the parent mbb. The
// register allocator will clean up said virtual copies later on.
MachineBasicBlock::iterator SplitPoint =
- FindSplitPointForStackProtector(ParentMBB);
+ FindSplitPointForStackProtector(ParentMBB, *TII);
// Splice the terminator of ParentMBB into SuccessMBB.
SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 8d91afb6e99d..10c6dcbdb049 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -470,21 +470,36 @@ bool StackProtector::InsertStackProtectors() {
// instrumentation has already been generated.
HasIRCheck = true;
+ // If we're instrumenting a block with a musttail call, the check has to be
+ // inserted before the call rather than between it and the return. The
+ // verifier guarantees that a musttail call is either directly before the
+ // return or with a single correct bitcast of the return value in between so
+ // we don't need to worry about many situations here.
+ Instruction *CheckLoc = RI;
+ Instruction *Prev = RI->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ CheckLoc = Prev;
+ else if (Prev) {
+ Prev = Prev->getPrevNonDebugInstruction();
+ if (Prev && isa<CallInst>(Prev) && cast<CallInst>(Prev)->isMustTailCall())
+ CheckLoc = Prev;
+ }
+
// Generate epilogue instrumentation. The epilogue intrumentation can be
// function-based or inlined depending on which mechanism the target is
// providing.
if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
// Generate the function-based epilogue instrumentation.
// The target provides a guard check function, generate a call to it.
- IRBuilder<> B(RI);
+ IRBuilder<> B(CheckLoc);
LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
Call->setAttributes(GuardCheck->getAttributes());
Call->setCallingConv(GuardCheck->getCallingConv());
} else {
// Generate the epilogue with inline instrumentation.
- // If we do not support SelectionDAG based tail calls, generate IR level
- // tail calls.
+ // If we do not support SelectionDAG based calls, generate IR level
+ // calls.
//
// For each block with a return instruction, convert this:
//
@@ -514,7 +529,8 @@ bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = CreateFailBB();
// Split the basic block before the return instruction.
- BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
+ BasicBlock *NewBB =
+ BB->splitBasicBlock(CheckLoc->getIterator(), "SP_return");
// Update the dominator tree if we need to.
if (DT && DT->isReachableFromEntry(BB)) {
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 6fd205c654a8..9f05917cf7cc 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -803,6 +803,18 @@ Constant *Constant::mergeUndefsWith(Constant *C, Constant *Other) {
return C;
}
+bool Constant::isManifestConstant() const {
+ if (isa<ConstantData>(this))
+ return true;
+ if (isa<ConstantAggregate>(this) || isa<ConstantExpr>(this)) {
+ for (const Value *Op : operand_values())
+ if (!cast<Constant>(Op)->isManifestConstant())
+ return false;
+ return true;
+ }
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// ConstantInt
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 69307b617552..2d810ffd350b 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -1397,6 +1397,17 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (TargetObjectWriter->getEMachine() == ELF::EM_386 &&
Type == ELF::R_386_GOTOFF)
return true;
+
+ // ld.lld handles R_MIPS_HI16/R_MIPS_LO16 separately, not as a whole, so
+ // it doesn't know that an R_MIPS_HI16 with implicit addend 1 and an
+ // R_MIPS_LO16 with implicit addend -32768 represents 32768, which is in
+ // range of a MergeInputSection. We could introduce a new RelExpr member
+ // (like R_RISCV_PC_INDIRECT for R_RISCV_PCREL_HI20 / R_RISCV_PCREL_LO12)
+ // but the complexity is unnecessary given that GNU as keeps the original
+ // symbol for this case as well.
+ if (TargetObjectWriter->getEMachine() == ELF::EM_MIPS &&
+ !hasRelocationAddend())
+ return true;
}
// Most TLS relocations use a got, so they need the symbol. Even those that
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e2f014d1815b..123a23a5242c 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1729,7 +1729,7 @@ void Option::printHelpStr(StringRef HelpStr, size_t Indent,
void Option::printEnumValHelpStr(StringRef HelpStr, size_t BaseIndent,
size_t FirstLineIndentedBy) {
const StringRef ValHelpPrefix = " ";
- assert(BaseIndent >= FirstLineIndentedBy + ValHelpPrefix.size());
+ assert(BaseIndent >= FirstLineIndentedBy);
std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
outs().indent(BaseIndent - FirstLineIndentedBy)
<< ArgHelpPrefix << ValHelpPrefix << Split.first << "\n";
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1451151f4dc5..c522ee76626d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16335,25 +16335,36 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
- // Nand not supported in LSE.
- if (AI->getOperation() == AtomicRMWInst::Nand) return AtomicExpansionKind::LLSC;
- // Leave 128 bits to LLSC.
- if (Subtarget->hasLSE() && Size < 128)
- return AtomicExpansionKind::None;
- if (Subtarget->outlineAtomics() && Size < 128) {
- // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
- // Don't outline them unless
- // (1) high level <atomic> support approved:
- // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
- // (2) low level libgcc and compiler-rt support implemented by:
- // min/max outline atomics helpers
- if (AI->getOperation() != AtomicRMWInst::Min &&
- AI->getOperation() != AtomicRMWInst::Max &&
- AI->getOperation() != AtomicRMWInst::UMin &&
- AI->getOperation() != AtomicRMWInst::UMax) {
+
+ // Nand is not supported in LSE.
+ // Leave 128 bits to LLSC or CmpXChg.
+ if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
+ if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
+ if (Subtarget->outlineAtomics()) {
+ // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
+ // Don't outline them unless
+ // (1) high level <atomic> support approved:
+ // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
+ // (2) low level libgcc and compiler-rt support implemented by:
+ // min/max outline atomics helpers
+ if (AI->getOperation() != AtomicRMWInst::Min &&
+ AI->getOperation() != AtomicRMWInst::Max &&
+ AI->getOperation() != AtomicRMWInst::UMin &&
+ AI->getOperation() != AtomicRMWInst::UMax) {
+ return AtomicExpansionKind::None;
+ }
}
}
+
+ // At -O0, fast-regalloc cannot cope with the live vregs necessary to
+ // implement atomicrmw without spilling. If the target address is also on the
+ // stack and close enough to the spill slot, this can lead to a situation
+ // where the monitor always gets cleared and the atomic operation can never
+ // succeed. So at -O0 lower this operation to a CAS loop.
+ if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+ return AtomicExpansionKind::CmpXChg;
+
return AtomicExpansionKind::LLSC;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 5259f4f5a4d0..fc5ef02e8457 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -1791,7 +1791,7 @@ bool AArch64InstructionSelector::selectVectorAshrLshr(
NegOpc = AArch64::NEGv8i16;
} else if (Ty == LLT::vector(16, 8)) {
Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
- NegOpc = AArch64::NEGv8i16;
+ NegOpc = AArch64::NEGv16i8;
} else if (Ty == LLT::vector(8, 8)) {
Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
NegOpc = AArch64::NEGv8i8;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 112eb59e173d..e418d53b56a4 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -5934,6 +5934,9 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
|| AddrMode == ARMII::AddrModeT2_so // SP can't be used as based register
|| AddrMode == ARMII::AddrModeT2_pc // PCrel access
|| AddrMode == ARMII::AddrMode2 // Used by PRE and POST indexed LD/ST
+ || AddrMode == ARMII::AddrModeT2_i7 // v8.1-M MVE
+ || AddrMode == ARMII::AddrModeT2_i7s2 // v8.1-M MVE
+ || AddrMode == ARMII::AddrModeT2_i7s4 // v8.1-M sys regs VLDR/VSTR
|| AddrMode == ARMII::AddrModeNone)
return false;
@@ -5976,6 +5979,10 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
NumBits = 8;
break;
case ARMII::AddrModeT2_i8s4:
+ // FIXME: Values are already scaled in this addressing mode.
+ assert((Fixup & 3) == 0 && "Can't encode this offset!");
+ NumBits = 10;
+ break;
case ARMII::AddrModeT2_ldrex:
NumBits = 8;
Scale = 4;
@@ -5984,17 +5991,6 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
case ARMII::AddrMode_i12:
NumBits = 12;
break;
- case ARMII::AddrModeT2_i7:
- NumBits = 7;
- break;
- case ARMII::AddrModeT2_i7s2:
- NumBits = 7;
- Scale = 2;
- break;
- case ARMII::AddrModeT2_i7s4:
- NumBits = 7;
- Scale = 4;
- break;
case ARMII::AddrModeT1_s: // SP-relative LD/ST
NumBits = 8;
Scale = 4;
@@ -6004,8 +6000,8 @@ bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,
}
// Make sure the offset is encodable for instructions that scale the
// immediate.
- if (((OffVal * Scale + Fixup) & (Scale - 1)) != 0)
- return false;
+ assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&
+ "Can't encode this offset!");
OffVal += Fixup / Scale;
unsigned Mask = (1 << NumBits) - 1;
diff --git a/llvm/lib/Target/BPF/BPFMIPeephole.cpp b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
index df870314fffe..354980e4bf3c 100644
--- a/llvm/lib/Target/BPF/BPFMIPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFMIPeephole.cpp
@@ -475,6 +475,9 @@ bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
if (MI.getOpcode() == BPF::SRL_ri &&
MI.getOperand(2).getImm() == 32) {
SrcReg = MI.getOperand(1).getReg();
+ if (!MRI->hasOneNonDBGUse(SrcReg))
+ continue;
+
MI2 = MRI->getVRegDef(SrcReg);
DstReg = MI.getOperand(0).getReg();
diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index 18a4f60c171a..0348e2200acb 100644
--- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -85,8 +85,17 @@ static bool BPFPreserveDITypeImpl(Function &F) {
} else {
Reloc = BPFCoreSharedInfo::BTF_TYPE_ID_REMOTE;
DIType *Ty = cast<DIType>(MD);
+ while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ unsigned Tag = DTy->getTag();
+ if (Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type)
+ break;
+ Ty = DTy->getBaseType();
+ }
+
if (Ty->getName().empty())
report_fatal_error("Empty type name for BTF_TYPE_ID_REMOTE reloc");
+ MD = Ty;
}
BasicBlock *BB = Call->getParent();
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index c0244b9f2c74..a8fef2517b03 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -12,6 +12,7 @@
#include "BPFTargetMachine.h"
#include "BPF.h"
+#include "BPFTargetTransformInfo.h"
#include "MCTargetDesc/BPFMCAsmInfo.h"
#include "TargetInfo/BPFTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -145,6 +146,11 @@ void BPFPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
}
+TargetTransformInfo
+BPFTargetMachine::getTargetTransformInfo(const Function &F) {
+ return TargetTransformInfo(BPFTTIImpl(this, F));
+}
+
// Install an instruction selector pass using
// the ISelDag to gen BPF code.
bool BPFPassConfig::addInstSelector() {
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.h b/llvm/lib/Target/BPF/BPFTargetMachine.h
index 5243a15eb7b0..61c8a44cc402 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.h
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.h
@@ -34,6 +34,8 @@ public:
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
}
diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
new file mode 100644
index 000000000000..62055497e685
--- /dev/null
+++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -0,0 +1,61 @@
+//===------ BPFTargetTransformInfo.h - BPF specific TTI ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file uses the target's specific information to
+// provide more precise answers to certain TTI queries, while letting the
+// target independent and default TTI implementations handle the rest.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
+
+#include "BPFTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
+
+namespace llvm {
+class BPFTTIImpl : public BasicTTIImplBase<BPFTTIImpl> {
+ typedef BasicTTIImplBase<BPFTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const BPFSubtarget *ST;
+ const BPFTargetLowering *TLI;
+
+ const BPFSubtarget *getST() const { return ST; }
+ const BPFTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit BPFTTIImpl(const BPFTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) {
+ if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
+ return TTI::TCC_Free;
+
+ return TTI::TCC_Basic;
+ }
+
+ int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const llvm::Instruction *I = nullptr) {
+ if (Opcode == Instruction::Select)
+ return SCEVCheapExpansionBudget;
+
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
+ I);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_BPF_BPFTARGETTRANSFORMINFO_H
diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/lib/Target/BPF/BTF.def
index 2d2e9a04aa6d..66cf2c90ead4 100644
--- a/llvm/lib/Target/BPF/BTF.def
+++ b/llvm/lib/Target/BPF/BTF.def
@@ -30,5 +30,6 @@ HANDLE_BTF_KIND(12, FUNC)
HANDLE_BTF_KIND(13, FUNC_PROTO)
HANDLE_BTF_KIND(14, VAR)
HANDLE_BTF_KIND(15, DATASEC)
+HANDLE_BTF_KIND(16, FLOAT)
#undef HANDLE_BTF_KIND
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index f9bdffe7cbae..9249d679c7bd 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -371,6 +371,21 @@ void BTFKindDataSec::emitType(MCStreamer &OS) {
}
}
+BTFTypeFloat::BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName)
+ : Name(TypeName) {
+ Kind = BTF::BTF_KIND_FLOAT;
+ BTFType.Info = Kind << 24;
+ BTFType.Size = roundupToBytes(SizeInBits);
+}
+
+void BTFTypeFloat::completeType(BTFDebug &BDebug) {
+ if (IsCompleted)
+ return;
+ IsCompleted = true;
+
+ BTFType.NameOff = BDebug.addString(Name);
+}
+
uint32_t BTFStringTable::addString(StringRef S) {
// Check whether the string already exists.
for (auto &OffsetM : OffsetToIdMap) {
@@ -409,18 +424,28 @@ uint32_t BTFDebug::addType(std::unique_ptr<BTFTypeBase> TypeEntry) {
}
void BTFDebug::visitBasicType(const DIBasicType *BTy, uint32_t &TypeId) {
- // Only int types are supported in BTF.
+ // Only int and binary floating point types are supported in BTF.
uint32_t Encoding = BTy->getEncoding();
- if (Encoding != dwarf::DW_ATE_boolean && Encoding != dwarf::DW_ATE_signed &&
- Encoding != dwarf::DW_ATE_signed_char &&
- Encoding != dwarf::DW_ATE_unsigned &&
- Encoding != dwarf::DW_ATE_unsigned_char)
+ std::unique_ptr<BTFTypeBase> TypeEntry;
+ switch (Encoding) {
+ case dwarf::DW_ATE_boolean:
+ case dwarf::DW_ATE_signed:
+ case dwarf::DW_ATE_signed_char:
+ case dwarf::DW_ATE_unsigned:
+ case dwarf::DW_ATE_unsigned_char:
+ // Create a BTF type instance for this DIBasicType and put it into
+ // DIToIdMap for cross-type reference check.
+ TypeEntry = std::make_unique<BTFTypeInt>(
+ Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
+ break;
+ case dwarf::DW_ATE_float:
+ TypeEntry =
+ std::make_unique<BTFTypeFloat>(BTy->getSizeInBits(), BTy->getName());
+ break;
+ default:
return;
+ }
- // Create a BTF type instance for this DIBasicType and put it into
- // DIToIdMap for cross-type reference check.
- auto TypeEntry = std::make_unique<BTFTypeInt>(
- Encoding, BTy->getSizeInBits(), BTy->getOffsetInBits(), BTy->getName());
TypeId = addType(std::move(TypeEntry), BTy);
}
@@ -1171,6 +1196,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
if (Linkage != GlobalValue::InternalLinkage &&
Linkage != GlobalValue::ExternalLinkage &&
Linkage != GlobalValue::WeakAnyLinkage &&
+ Linkage != GlobalValue::WeakODRLinkage &&
Linkage != GlobalValue::ExternalWeakLinkage)
continue;
@@ -1199,8 +1225,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
const DataLayout &DL = Global.getParent()->getDataLayout();
uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
- DataSecEntries[std::string(SecName)]->addVar(VarId, Asm->getSymbol(&Global),
- Size);
+ DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
+ Asm->getSymbol(&Global), Size);
}
}
@@ -1278,7 +1304,19 @@ void BTFDebug::processFuncPrototypes(const Function *F) {
uint8_t Scope = BTF::FUNC_EXTERN;
auto FuncTypeEntry =
std::make_unique<BTFTypeFunc>(SP->getName(), ProtoTypeId, Scope);
- addType(std::move(FuncTypeEntry));
+ uint32_t FuncId = addType(std::move(FuncTypeEntry));
+ if (F->hasSection()) {
+ StringRef SecName = F->getSection();
+
+ if (DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) {
+ DataSecEntries[std::string(SecName)] =
+ std::make_unique<BTFKindDataSec>(Asm, std::string(SecName));
+ }
+
+ // We really don't know func size, set it to 0.
+ DataSecEntries[std::string(SecName)]->addDataSecEntry(FuncId,
+ Asm->getSymbol(F), 0);
+ }
}
void BTFDebug::endModule() {
diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h
index 1bad0d11fee4..76f1901779bb 100644
--- a/llvm/lib/Target/BPF/BTFDebug.h
+++ b/llvm/lib/Target/BPF/BTFDebug.h
@@ -187,7 +187,7 @@ public:
uint32_t getSize() override {
return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
}
- void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
+ void addDataSecEntry(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
Vars.push_back(std::make_tuple(Id, Sym, Size));
}
std::string getName() { return Name; }
@@ -195,6 +195,15 @@ public:
void emitType(MCStreamer &OS) override;
};
+/// Handle binary floating point type.
+class BTFTypeFloat : public BTFTypeBase {
+ StringRef Name;
+
+public:
+ BTFTypeFloat(uint32_t SizeInBits, StringRef TypeName);
+ void completeType(BTFDebug &BDebug) override;
+};
+
/// String table.
class BTFStringTable {
/// String table size in bytes.
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index cce21f32414a..6257709731b9 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -321,6 +321,12 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
O << "0, ";
printOperand(MI, OpNo, O);
return false;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
case 'U': // Print 'u' for update form.
case 'X': // Print 'x' for indexed form.
// FIXME: Currently for PowerPC memory operands are always loaded
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 50ce11b8374f..16536bf23deb 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -859,15 +859,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
BuildMI(MBB, MBBI, dl,
TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
: PPC::PROBED_STACKALLOC_32))
- .addDef(ScratchReg)
- .addDef(TempReg) // TempReg stores the old sp.
+ .addDef(TempReg)
+ .addDef(ScratchReg) // ScratchReg stores the old sp.
.addImm(NegFrameSize);
// FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
// update the ScratchReg to meet the assumption that ScratchReg contains
// the NegFrameSize. This solution is rather tricky.
if (!HasRedZone) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
- .addReg(TempReg)
+ .addReg(ScratchReg)
.addReg(SPReg);
HasSTUX = true;
}
@@ -1187,7 +1187,6 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
- // TODO: Generate CFI instructions.
bool isPPC64 = Subtarget.isPPC64();
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -1219,6 +1218,7 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
bool HasBP = RegInfo->hasBasePointer(MF);
Register BPReg = RegInfo->getBaseRegister(MF);
Align MaxAlign = MFI.getMaxAlign();
+ bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
// Subroutines to generate .cfi_* directives.
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
@@ -1272,212 +1272,221 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
.addReg(SPReg)
.addReg(NegSizeReg);
};
- // Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
- // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
- // available and r1 is already copied to r30 which is BPReg. So BPReg stores
- // the value of stackptr.
- // First we have to probe tail interval whose size is less than probesize,
- // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
- // ScratchReg stores the value of ((stackptr % align) % probesize). Then we
- // probe each block sized probesize until stackptr meets
- // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
- // as negprobesize. At both stages, TempReg stores the value of
- // (stackptr - (stackptr % align)).
- auto dynamicProbe = [&](MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, Register ScratchReg,
- Register TempReg) {
- assert(HasBP && isPPC64 && "Probe alignment part not available");
+ // Used to probe stack when realignment is required.
+ // Note that, according to ABI's requirement, *sp must always equals the
+ // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
+ // Following is pseudo code:
+ // final_sp = (sp & align) + negframesize;
+ // neg_gap = final_sp - sp;
+ // while (neg_gap < negprobesize) {
+ // stdu fp, negprobesize(sp);
+ // neg_gap -= negprobesize;
+ // }
+ // stdux fp, sp, neg_gap
+ //
+ // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
+ // before probe code, we don't need to save it, so we get one additional reg
+ // that can be used to materialize the probeside if needed to use xform.
+ // Otherwise, we can NOT materialize probeside, so we can only use Dform for
+ // now.
+ //
+ // The allocations are:
+ // if (HasBP && HasRedzone) {
+ // r0: materialize the probesize if needed so that we can use xform.
+ // r12: `neg_gap`
+ // } else {
+ // r0: back-chain pointer
+ // r12: `neg_gap`.
+ // }
+ auto probeRealignedStack = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ Register ScratchReg, Register TempReg) {
+ assert(HasBP && "The function is supposed to have base pointer when its "
+ "stack is realigned.");
assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
- // ScratchReg = stackptr % align
- BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(BPReg)
- .addImm(0)
- .addImm(64 - Log2(MaxAlign));
- // TempReg = stackptr - (stackptr % align)
- BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
- .addReg(ScratchReg)
- .addReg(BPReg);
- // ScratchReg = (stackptr % align) % probesize
- BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(ScratchReg)
- .addImm(0)
- .addImm(64 - Log2(ProbeSize));
+
+ // FIXME: We can eliminate this limitation if we get more infomation about
+ // which part of redzone are already used. Used redzone can be treated
+ // probed. But there might be `holes' in redzone probed, this could
+ // complicate the implementation.
+ assert(ProbeSize >= Subtarget.getRedZoneSize() &&
+ "Probe size should be larger or equal to the size of red-zone so "
+ "that red-zone is not clobbered by probing.");
+
+ Register &FinalStackPtr = TempReg;
+ // FIXME: We only support NegProbeSize materializable by DForm currently.
+ // When HasBP && HasRedzone, we can use xform if we have an additional idle
+ // register.
+ NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
+ assert(isInt<16>(NegProbeSize) &&
+ "NegProbeSize should be materializable by DForm");
Register CRReg = PPC::CR0;
- // If (stackptr % align) % probesize == 0, we should not generate probe
- // code. Layout of output assembly kinda like:
+ // Layout of output assembly kinda like:
// bb.0:
// ...
- // cmpldi $scratchreg, 0
- // beq bb.2
- // bb.1: # Probe tail interval
- // neg $scratchreg, $scratchreg
- // stdux $bpreg, r1, $scratchreg
+ // sub $scratchreg, $finalsp, r1
+ // cmpdi $scratchreg, <negprobesize>
+ // bge bb.2
+ // bb.1:
+ // stdu <backchain>, <negprobesize>(r1)
+ // sub $scratchreg, $scratchreg, negprobesize
+ // cmpdi $scratchreg, <negprobesize>
+ // blt bb.1
// bb.2:
- // <materialize negprobesize into $scratchreg>
- // cmpd r1, $tempreg
- // beq bb.4
- // bb.3: # Loop to probe each block
- // stdux $bpreg, r1, $scratchreg
- // cmpd r1, $tempreg
- // bne bb.3
- // bb.4:
- // ...
+ // stdux <backchain>, r1, $scratchreg
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
- MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ProbeResidualMBB);
- MachineBasicBlock *ProbeLoopPreHeaderMBB =
- MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeExitMBB);
- // bb.4
- ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
- ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ // bb.2
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
+ BackChainPointer);
+ if (HasRedZone)
+ // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
+ // to TempReg to satisfy it.
+ BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
+ .addReg(BPReg)
+ .addReg(BPReg);
+ ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
+ ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ }
// bb.0
- BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
- BuildMI(&MBB, DL, TII.get(PPC::BCC))
- .addImm(PPC::PRED_EQ)
- .addReg(CRReg)
- .addMBB(ProbeLoopPreHeaderMBB);
- MBB.addSuccessor(ProbeResidualMBB);
- MBB.addSuccessor(ProbeLoopPreHeaderMBB);
+ {
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
+ .addReg(SPReg)
+ .addReg(FinalStackPtr);
+ if (!HasRedZone)
+ BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(&MBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_GE)
+ .addReg(CRReg)
+ .addMBB(ProbeExitMBB);
+ MBB.addSuccessor(ProbeLoopBodyMBB);
+ MBB.addSuccessor(ProbeExitMBB);
+ }
// bb.1
- BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
- .addReg(ScratchReg);
- allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
- false, BPReg);
- ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
- // bb.2
- MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
- NegProbeSize, ScratchReg);
- BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
- .addReg(SPReg)
- .addReg(TempReg);
- BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
- .addImm(PPC::PRED_EQ)
- .addReg(CRReg)
- .addMBB(ProbeExitMBB);
- ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
- ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
- // bb.3
- allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
- false, BPReg);
- BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
- .addReg(SPReg)
- .addReg(TempReg);
- BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
- .addImm(PPC::PRED_NE)
- .addReg(CRReg)
- .addMBB(ProbeLoopBodyMBB);
- ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
- ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
+ 0, true /*UseDForm*/, BackChainPointer);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
+ CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_LT)
+ .addReg(CRReg)
+ .addMBB(ProbeLoopBodyMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+ }
// Update liveins.
- recomputeLiveIns(*ProbeResidualMBB);
- recomputeLiveIns(*ProbeLoopPreHeaderMBB);
recomputeLiveIns(*ProbeLoopBodyMBB);
recomputeLiveIns(*ProbeExitMBB);
return ProbeExitMBB;
};
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
- // SP = SP - SP % MaxAlign.
+ // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
+ // the offset subtracted from SP is determined by SP's runtime value.
if (HasBP && MaxAlign > 1) {
- // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
- // 64-bit mode.
- if (isPPC64) {
- // Use BPReg to calculate CFA.
- if (needsCFI)
- buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
- // Since we have SPReg copied to BPReg at the moment, FPReg can be used as
- // TempReg.
- Register TempReg = FPReg;
- CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
- // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
- BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
- .addReg(BPReg)
- .addReg(BPReg);
- } else {
- // Initialize current frame pointer.
- BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
+ // Calculate final stack pointer.
+ if (isPPC64)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
.addReg(SPReg)
- .addReg(SPReg);
- // Use FPReg to calculate CFA.
- if (needsCFI)
- buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(FPReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(32 - Log2(MaxAlign))
.addImm(31);
- BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
- .addReg(ScratchReg)
- .addReg(SPReg);
- }
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
+ FPReg)
+ .addReg(ScratchReg)
+ .addReg(SPReg);
+ MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+ FPReg)
+ .addReg(ScratchReg)
+ .addReg(FPReg);
+ CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
+ if (needsCFI)
+ buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
} else {
// Initialize current frame pointer.
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
// Use FPReg to calculate CFA.
if (needsCFI)
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
- }
- // Probe residual part.
- if (NegResidualSize) {
- bool ResidualUseDForm = CanUseDForm(NegResidualSize);
- if (!ResidualUseDForm)
- MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
- allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
- ResidualUseDForm, FPReg);
- }
- bool UseDForm = CanUseDForm(NegProbeSize);
- // If number of blocks is small, just probe them directly.
- if (NumBlocks < 3) {
- if (!UseDForm)
- MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
- for (int i = 0; i < NumBlocks; ++i)
- allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
- FPReg);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm, FPReg);
}
- } else {
- // Since CTR is a volatile register and current shrinkwrap implementation
- // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
- // CTR loop to probe.
- // Calculate trip count and stores it in CTRReg.
- MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
- BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
- .addReg(ScratchReg, RegState::Kill);
- if (!UseDForm)
- MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
- // Create MBBs of the loop.
- MachineFunction::iterator MBBInsertPoint =
- std::next(CurrentMBB->getIterator());
- MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, LoopMBB);
- MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ExitMBB);
- // Synthesize the loop body.
- allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
- UseDForm, FPReg);
- BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
- .addMBB(LoopMBB);
- LoopMBB->addSuccessor(ExitMBB);
- LoopMBB->addSuccessor(LoopMBB);
- // Synthesize the exit MBB.
- ExitMBB->splice(ExitMBB->end(), CurrentMBB,
- std::next(MachineBasicBlock::iterator(MI)),
- CurrentMBB->end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
- CurrentMBB->addSuccessor(LoopMBB);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
+ FPReg);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+ }
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(CurrentMBB->getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm, FPReg);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), CurrentMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ CurrentMBB->end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
+ CurrentMBB->addSuccessor(LoopMBB);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ }
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
}
- // Update liveins.
- recomputeLiveIns(*LoopMBB);
- recomputeLiveIns(*ExitMBB);
}
++NumPrologProbed;
MI.eraseFromParent();
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 929a72ac687e..7833bfc1d1b6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -167,6 +167,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
+ // Custom lower inline assembly to check for special registers.
+ setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
+ setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
+
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
for (MVT VT : MVT::integer_valuetypes()) {
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -3461,6 +3465,57 @@ SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
return Op.getOperand(0);
}
+SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
+
+ assert((Op.getOpcode() == ISD::INLINEASM ||
+ Op.getOpcode() == ISD::INLINEASM_BR) &&
+ "Expecting Inline ASM node.");
+
+ // If an LR store is already known to be required then there is not point in
+ // checking this ASM as well.
+ if (MFI.isLRStoreRequired())
+ return Op;
+
+ // Inline ASM nodes have an optional last operand that is an incoming Flag of
+ // type MVT::Glue. We want to ignore this last operand if that is the case.
+ unsigned NumOps = Op.getNumOperands();
+ if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
+ --NumOps;
+
+ // Check all operands that may contain the LR.
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+ ++i; // Skip the ID value.
+
+ switch (InlineAsm::getKind(Flags)) {
+ default:
+ llvm_unreachable("Bad flags!");
+ case InlineAsm::Kind_RegUse:
+ case InlineAsm::Kind_Imm:
+ case InlineAsm::Kind_Mem:
+ i += NumVals;
+ break;
+ case InlineAsm::Kind_Clobber:
+ case InlineAsm::Kind_RegDef:
+ case InlineAsm::Kind_RegDefEarlyClobber: {
+ for (; NumVals; --NumVals, ++i) {
+ Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
+ if (Reg != PPC::LR && Reg != PPC::LR8)
+ continue;
+ MFI.setLRStoreRequired();
+ return Op;
+ }
+ break;
+ }
+ }
+ }
+
+ return Op;
+}
+
SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
if (Subtarget.isAIXABI())
@@ -10316,6 +10371,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
// Variable argument lowering.
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::VAARG: return LowerVAARG(Op, DAG);
@@ -15090,6 +15147,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, &PPC::VSSRCRegClass);
else
return std::make_pair(0U, &PPC::VSFRCRegClass);
+ } else if (Constraint == "lr") {
+ if (VT == MVT::i64)
+ return std::make_pair(0U, &PPC::LR8RCRegClass);
+ else
+ return std::make_pair(0U, &PPC::LRRCRegClass);
}
// If we name a VSX register, we can't defer to the base class because it
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 0dda2c181572..836c52bdff95 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1128,6 +1128,7 @@ namespace llvm {
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index e03617aa75ff..551735c85b51 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -173,7 +173,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in {
foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
def VSRp#!srl(Index, 1) : VSRPair<!srl(Index, 1), "vsp"#Index,
[!cast<VSRL>("VSL"#Index), !cast<VSRL>("VSL"#!add(Index, 1))]>,
- DwarfRegNum<[0, 0]>;
+ DwarfRegNum<[-1, -1]>;
}
// VSR pairs 16 - 31 (corresponding to VSRs 32 - 62 paired with 33 - 63).
@@ -181,7 +181,7 @@ let SubRegIndices = [sub_vsx0, sub_vsx1] in {
def VSRp#!add(!srl(Index, 1), 16) :
VSRPair<!add(!srl(Index, 1), 16), "vsp"#!add(Index, 32),
[!cast<VR>("V"#Index), !cast<VR>("V"#!add(Index, 1))]>,
- DwarfRegNum<[0, 0]>;
+ DwarfRegNum<[-1, -1]>;
}
}
@@ -409,20 +409,27 @@ def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
let isAllocatable = 0;
}
+def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> {
+ let isAllocatable = 0;
+}
+def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> {
+ let isAllocatable = 0;
+}
+
def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> {
let CopyCost = -1;
}
let SubRegIndices = [sub_pair0, sub_pair1] in {
- def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
- def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
- def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
- def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
- def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
- def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
- def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
- def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
+ def ACC0 : ACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
+ def ACC1 : ACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
+ def ACC2 : ACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
+ def ACC3 : ACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
+ def ACC4 : ACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
+ def ACC5 : ACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
+ def ACC6 : ACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
+ def ACC7 : ACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
ACC4, ACC5, ACC6, ACC7)> {
@@ -430,14 +437,14 @@ def ACCRC : RegisterClass<"PPC", [v512i1], 128, (add ACC0, ACC1, ACC2, ACC3,
}
let SubRegIndices = [sub_pair0, sub_pair1] in {
- def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[0, 0]>;
- def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[0, 0]>;
- def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[0, 0]>;
- def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[0, 0]>;
- def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[0, 0]>;
- def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[0, 0]>;
- def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[0, 0]>;
- def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[0, 0]>;
+ def UACC0 : UACC<0, "acc0", [VSRp0, VSRp1]>, DwarfRegNum<[-1, -1]>;
+ def UACC1 : UACC<1, "acc1", [VSRp2, VSRp3]>, DwarfRegNum<[-1, -1]>;
+ def UACC2 : UACC<2, "acc2", [VSRp4, VSRp5]>, DwarfRegNum<[-1, -1]>;
+ def UACC3 : UACC<3, "acc3", [VSRp6, VSRp7]>, DwarfRegNum<[-1, -1]>;
+ def UACC4 : UACC<4, "acc4", [VSRp8, VSRp9]>, DwarfRegNum<[-1, -1]>;
+ def UACC5 : UACC<5, "acc5", [VSRp10, VSRp11]>, DwarfRegNum<[-1, -1]>;
+ def UACC6 : UACC<6, "acc6", [VSRp12, VSRp13]>, DwarfRegNum<[-1, -1]>;
+ def UACC7 : UACC<7, "acc7", [VSRp14, VSRp15]>, DwarfRegNum<[-1, -1]>;
}
def UACCRC : RegisterClass<"PPC", [v512i1], 128,
(add UACC0, UACC1, UACC2, UACC3,
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index b3d8100fe016..c90ff8b7d59d 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1212,6 +1212,27 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
+bool PPCTTIImpl::areFunctionArgsABICompatible(
+ const Function *Caller, const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const {
+
+ // We need to ensure that argument promotion does not
+ // attempt to promote pointers to MMA types (__vector_pair
+ // and __vector_quad) since these types explicitly cannot be
+ // passed as arguments. Both of these types are larger than
+ // the 128-bit Altivec vectors and have a scalar size of 1 bit.
+ if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args))
+ return false;
+
+ return llvm::none_of(Args, [](Argument *A) {
+ auto *EltTy = cast<PointerType>(A->getType())->getElementType();
+ if (EltTy->isSized())
+ return (EltTy->isIntOrIntVectorTy(1) &&
+ EltTy->getPrimitiveSizeInBits() > 128);
+ return false;
+ });
+}
+
bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT,
AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index bc946715156f..c38ae90bc7dc 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -129,6 +129,9 @@ public:
unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
+ bool areFunctionArgsABICompatible(const Function *Caller,
+ const Function *Callee,
+ SmallPtrSetImpl<Argument *> &Args) const;
/// @}
};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 60bd1b24cab8..5c228820f0cc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3909,10 +3909,10 @@ foreach vti = AllIntegerVectors in {
(DecImm simm5_plus1:$rs2),
GPR:$vl,
vti.SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask V0),
+ def : Pat<(vti.Mask (int_riscv_vmslt_mask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
- (vti.Mask VR:$merge),
+ (vti.Mask V0),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLE_VI_"#vti.LMul.MX#"_MASK")
VR:$merge,
@@ -3922,17 +3922,17 @@ foreach vti = AllIntegerVectors in {
GPR:$vl,
vti.SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
+ def : Pat<(vti.Mask (int_riscv_vmsltu (vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
(DecImm simm5_plus1:$rs2),
GPR:$vl,
vti.SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
+ def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar simm5_plus1:$rs2),
- (vti.Mask VR:$merge),
+ (vti.Mask V0),
(XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSLEU_VI_"#vti.LMul.MX#"_MASK")
VR:$merge,
@@ -3950,11 +3950,11 @@ foreach vti = AllIntegerVectors in {
vti.RegClass:$rs1,
GPR:$vl,
vti.SEW)>;
- def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask V0),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar 0),
- (vti.Mask VR:$merge),
- (XLenVT (VLOp GPR:$vl)))),
+ def : Pat<(vti.Mask (int_riscv_vmsltu_mask (vti.Mask VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar 0),
+ (vti.Mask V0),
+ (XLenVT (VLOp GPR:$vl)))),
(!cast<Instruction>("PseudoVMSNE_VV_"#vti.LMul.MX#"_MASK")
VR:$merge,
vti.RegClass:$rs1,
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 603446755aaf..9ace36f344a5 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -285,10 +285,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
- // FIXME: Can we support these natively?
+ // Expand 128 bit shifts without using a libcall.
setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
// We have native instructions for i8, i16 and i32 extensions, but not i1.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index d3bbadf27478..ff6404c30971 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -885,16 +885,9 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
SmallVector<Value *, 16> FMCArgs;
for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
Constant *Clause = LPI->getClause(I);
- // As a temporary workaround for the lack of aggregate varargs support
- // in the interface between JS and wasm, break out filter operands into
- // their component elements.
- if (LPI->isFilter(I)) {
- auto *ATy = cast<ArrayType>(Clause->getType());
- for (unsigned J = 0, E = ATy->getNumElements(); J < E; ++J) {
- Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(J), "filter");
- FMCArgs.push_back(EV);
- }
- } else
+ // TODO Handle filters (= exception specifications).
+ // https://bugs.llvm.org/show_bug.cgi?id=50396
+ if (LPI->isCatch(I))
FMCArgs.push_back(Clause);
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6b816c710f98..1e2407c7e7f6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37889,6 +37889,8 @@ static SDValue foldShuffleOfHorizOp(SDNode *N, SelectionDAG &DAG) {
// replicating low and high halves (and without changing the type/length of
// the vector), we don't need the shuffle.
if (Opcode == X86ISD::MOVDDUP || Opcode == X86ISD::VBROADCAST) {
+ if (Opcode == X86ISD::VBROADCAST && !VT.is128BitVector())
+ return SDValue();
if (HOp.getScalarValueSizeInBits() == 64 && HOp.getValueType() == VT) {
// movddup (hadd X, X) --> hadd X, X
// broadcast (extract_vec_elt (hadd X, X), 0) --> hadd X, X
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 7a2facf226d8..dc6361aecc60 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1344,15 +1344,18 @@ def : Pat<(i32 (anyext_sdiv GR8:$src)), (MOVSX32rr8 GR8:$src)>;
// Any instruction that defines a 32-bit result leaves the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. Any other 32-bit operation will zero-extend
-// up to 64 bits. AssertSext/AssertZext aren't saying anything about the upper
-// 32 bits, they're probably just qualifying a CopyFromReg.
+// be copying from a truncate. AssertSext/AssertZext/AssertAlign aren't saying
+// anything about the upper 32 bits, they're probably just qualifying a
+// CopyFromReg. FREEZE may be coming from a a truncate. Any other 32-bit
+// operation will zero-extend up to 64 bits.
def def32 : PatLeaf<(i32 GR32:$src), [{
return N->getOpcode() != ISD::TRUNCATE &&
N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
N->getOpcode() != ISD::CopyFromReg &&
N->getOpcode() != ISD::AssertSext &&
- N->getOpcode() != ISD::AssertZext;
+ N->getOpcode() != ISD::AssertZext &&
+ N->getOpcode() != ISD::AssertAlign &&
+ N->getOpcode() != ISD::FREEZE;
}]>;
// In the case of a 32-bit def that is known to implicitly zero-extend,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 68c4156af2c4..85a7abe211b3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3221,11 +3221,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
}
}
- // ~(X - Y) --> ~X + Y
- if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
- if (isa<Constant>(X) || NotVal->hasOneUse())
- return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
-
// ~(~X >>s Y) --> (X >>s Y)
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
return BinaryOperator::CreateAShr(X, Y);
@@ -3256,9 +3251,15 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
return BinaryOperator::CreateAShr(ConstantExpr::getNot(C), Y);
}
- // ~(X + C) --> -(C + 1) - X
- if (match(Op0, m_Add(m_Value(X), m_Constant(C))))
- return BinaryOperator::CreateSub(ConstantExpr::getNeg(AddOne(C)), X);
+ // ~(X + C) --> ~C - X
+ if (match(NotVal, m_c_Add(m_Value(X), m_ImmConstant(C))))
+ return BinaryOperator::CreateSub(ConstantExpr::getNot(C), X);
+
+ // ~(X - Y) --> ~X + Y
+ // FIXME: is it really beneficial to sink the `not` here?
+ if (match(NotVal, m_Sub(m_Value(X), m_Value(Y))))
+ if (isa<Constant>(X) || NotVal->hasOneUse())
+ return BinaryOperator::CreateAdd(Builder.CreateNot(X), Y);
// ~(~X + Y) --> X - Y
if (match(NotVal, m_c_Add(m_Not(m_Value(X)), m_Value(Y))))
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f26c194d31b9..5f174aae09ec 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1095,7 +1095,10 @@ static Instruction *canonicalizeAbsNabs(SelectInst &Sel, ICmpInst &Cmp,
/// TODO: Wrapping flags could be preserved in some cases with better analysis.
Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
ICmpInst &Cmp) {
- if (!Cmp.isEquality())
+ // Value equivalence substitution requires an all-or-nothing replacement.
+ // It does not make sense for a vector compare where each lane is chosen
+ // independently.
+ if (!Cmp.isEquality() || Cmp.getType()->isVectorTy())
return nullptr;
// Canonicalize the pattern to ICMP_EQ by swapping the select operands.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 7295369365c4..127bf8080959 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -21,6 +21,30 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
+bool canTryToConstantAddTwoShiftAmounts(Value *Sh0, Value *ShAmt0, Value *Sh1,
+ Value *ShAmt1) {
+ // We have two shift amounts from two different shifts. The types of those
+ // shift amounts may not match. If that's the case let's bailout now..
+ if (ShAmt0->getType() != ShAmt1->getType())
+ return false;
+
+ // As input, we have the following pattern:
+ // Sh0 (Sh1 X, Q), K
+ // We want to rewrite that as:
+ // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
+ // While we know that originally (Q+K) would not overflow
+ // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
+ // shift amounts. so it may now overflow in smaller bitwidth.
+ // To ensure that does not happen, we need to ensure that the total maximal
+ // shift amount is still representable in that smaller bit width.
+ unsigned MaximalPossibleTotalShiftAmount =
+ (Sh0->getType()->getScalarSizeInBits() - 1) +
+ (Sh1->getType()->getScalarSizeInBits() - 1);
+ APInt MaximalRepresentableShiftAmount =
+ APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
+ return MaximalRepresentableShiftAmount.uge(MaximalPossibleTotalShiftAmount);
+}
+
// Given pattern:
// (x shiftopcode Q) shiftopcode K
// we should rewrite it as
@@ -57,26 +81,8 @@ Value *InstCombinerImpl::reassociateShiftAmtsOfTwoSameDirectionShifts(
if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1)))))
return nullptr;
- // We have two shift amounts from two different shifts. The types of those
- // shift amounts may not match. If that's the case let's bailout now..
- if (ShAmt0->getType() != ShAmt1->getType())
- return nullptr;
-
- // As input, we have the following pattern:
- // Sh0 (Sh1 X, Q), K
- // We want to rewrite that as:
- // Sh x, (Q+K) iff (Q+K) u< bitwidth(x)
- // While we know that originally (Q+K) would not overflow
- // (because 2 * (N-1) u<= iN -1), we have looked past extensions of
- // shift amounts. so it may now overflow in smaller bitwidth.
- // To ensure that does not happen, we need to ensure that the total maximal
- // shift amount is still representable in that smaller bit width.
- unsigned MaximalPossibleTotalShiftAmount =
- (Sh0->getType()->getScalarSizeInBits() - 1) +
- (Sh1->getType()->getScalarSizeInBits() - 1);
- APInt MaximalRepresentableShiftAmount =
- APInt::getAllOnesValue(ShAmt0->getType()->getScalarSizeInBits());
- if (MaximalRepresentableShiftAmount.ult(MaximalPossibleTotalShiftAmount))
+ // Verify that it would be safe to try to add those two shift amounts.
+ if (!canTryToConstantAddTwoShiftAmounts(Sh0, ShAmt0, Sh1, ShAmt1))
return nullptr;
// We are only looking for signbit extraction if we have two right shifts.
@@ -220,9 +226,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// Peek through an optional zext of the shift amount.
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
- // We have two shift amounts from two different shifts. The types of those
- // shift amounts may not match. If that's the case let's bailout now.
- if (MaskShAmt->getType() != ShiftShAmt->getType())
+ // Verify that it would be safe to try to add those two shift amounts.
+ if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
+ MaskShAmt))
return nullptr;
// Can we simplify (MaskShAmt+ShiftShAmt) ?
@@ -252,9 +258,9 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
// Peek through an optional zext of the shift amount.
match(MaskShAmt, m_ZExtOrSelf(m_Value(MaskShAmt)));
- // We have two shift amounts from two different shifts. The types of those
- // shift amounts may not match. If that's the case let's bailout now.
- if (MaskShAmt->getType() != ShiftShAmt->getType())
+ // Verify that it would be safe to try to add those two shift amounts.
+ if (!canTryToConstantAddTwoShiftAmounts(OuterShift, ShiftShAmt, Masked,
+ MaskShAmt))
return nullptr;
// Can we simplify (ShiftShAmt-MaskShAmt) ?
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index b3bae47e96de..65a6205f0302 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -1081,6 +1081,12 @@ bool LoopReroll::DAGRootTracker::collectUsedInstructions(SmallInstructionSet &Po
DenseSet<Instruction*> V;
collectInLoopUserSet(LoopIncs, Exclude, PossibleRedSet, V);
for (auto *I : V) {
+ if (I->mayHaveSideEffects()) {
+ LLVM_DEBUG(dbgs() << "LRR: Aborting - "
+ << "An instruction which does not belong to any root "
+ << "sets must not have side effects: " << *I);
+ return false;
+ }
Uses[I].set(IL_All);
}
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index bfe8db83b027..bb30c48127a0 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -43,10 +43,10 @@ STATISTIC(ObjectSizeIntrinsicsHandled,
"Number of 'objectsize' intrinsic calls handled");
static Value *lowerIsConstantIntrinsic(IntrinsicInst *II) {
- Value *Op = II->getOperand(0);
-
- return isa<Constant>(Op) ? ConstantInt::getTrue(II->getType())
- : ConstantInt::getFalse(II->getType());
+ if (auto *C = dyn_cast<Constant>(II->getOperand(0)))
+ if (C->isManifestConstant())
+ return ConstantInt::getTrue(II->getType());
+ return ConstantInt::getFalse(II->getType());
}
static bool replaceConditionalBranchesOnConstant(Instruction *II,
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index de6be52adf21..8feed9e9ebfe 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -542,9 +542,14 @@ private:
auto Iter = AdditionalUsers.find(I);
if (Iter != AdditionalUsers.end()) {
+ // Copy additional users before notifying them of changes, because new
+ // users may be added, potentially invalidating the iterator.
+ SmallVector<Instruction *, 2> ToNotify;
for (User *U : Iter->second)
if (auto *UI = dyn_cast<Instruction>(U))
- OperandChangedState(UI);
+ ToNotify.push_back(UI);
+ for (Instruction *UI : ToNotify)
+ OperandChangedState(UI);
}
}
void handleCallOverdefined(CallBase &CB);
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 3026342cc4a6..fb271a2118ba 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -780,7 +780,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,
/// When inlining a call site that has !llvm.mem.parallel_loop_access,
/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should
/// be propagated to all memory-accessing cloned instructions.
-static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
+static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
+ Function::iterator FEnd) {
MDNode *MemParallelLoopAccess =
CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);
MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);
@@ -789,41 +790,33 @@ static void PropagateCallSiteMetadata(CallBase &CB, ValueToValueMapTy &VMap) {
if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)
return;
- for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
- VMI != VMIE; ++VMI) {
- // Check that key is an instruction, to skip the Argument mapping, which
- // points to an instruction in the original function, not the inlined one.
- if (!VMI->second || !isa<Instruction>(VMI->first))
- continue;
-
- Instruction *NI = dyn_cast<Instruction>(VMI->second);
- if (!NI)
- continue;
-
- // This metadata is only relevant for instructions that access memory.
- if (!NI->mayReadOrWriteMemory())
- continue;
+ for (BasicBlock &BB : make_range(FStart, FEnd)) {
+ for (Instruction &I : BB) {
+ // This metadata is only relevant for instructions that access memory.
+ if (!I.mayReadOrWriteMemory())
+ continue;
- if (MemParallelLoopAccess) {
- // TODO: This probably should not overwrite MemParalleLoopAccess.
- MemParallelLoopAccess = MDNode::concatenate(
- NI->getMetadata(LLVMContext::MD_mem_parallel_loop_access),
- MemParallelLoopAccess);
- NI->setMetadata(LLVMContext::MD_mem_parallel_loop_access,
+ if (MemParallelLoopAccess) {
+ // TODO: This probably should not overwrite MemParalleLoopAccess.
+ MemParallelLoopAccess = MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),
+ MemParallelLoopAccess);
+ I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,
MemParallelLoopAccess);
- }
+ }
- if (AccessGroup)
- NI->setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
- NI->getMetadata(LLVMContext::MD_access_group), AccessGroup));
+ if (AccessGroup)
+ I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
+ I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
- if (AliasScope)
- NI->setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
- NI->getMetadata(LLVMContext::MD_alias_scope), AliasScope));
+ if (AliasScope)
+ I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
- if (NoAlias)
- NI->setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
- NI->getMetadata(LLVMContext::MD_noalias), NoAlias));
+ if (NoAlias)
+ I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(
+ I.getMetadata(LLVMContext::MD_noalias), NoAlias));
+ }
}
}
@@ -844,9 +837,9 @@ public:
/// subsequent remap() calls.
void clone();
- /// Remap instructions in the given VMap from the original to the cloned
+ /// Remap instructions in the given range from the original to the cloned
/// metadata.
- void remap(ValueToValueMapTy &VMap);
+ void remap(Function::iterator FStart, Function::iterator FEnd);
};
ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(
@@ -907,34 +900,27 @@ void ScopedAliasMetadataDeepCloner::clone() {
}
}
-void ScopedAliasMetadataDeepCloner::remap(ValueToValueMapTy &VMap) {
+void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
+ Function::iterator FEnd) {
if (MDMap.empty())
return; // Nothing to do.
- for (auto Entry : VMap) {
- // Check that key is an instruction, to skip the Argument mapping, which
- // points to an instruction in the original function, not the inlined one.
- if (!Entry->second || !isa<Instruction>(Entry->first))
- continue;
-
- Instruction *I = dyn_cast<Instruction>(Entry->second);
- if (!I)
- continue;
-
- // Only update scopes when we find them in the map. If they are not, it is
- // because we already handled that instruction before. This is faster than
- // tracking which instructions we already updated.
- if (MDNode *M = I->getMetadata(LLVMContext::MD_alias_scope))
- if (MDNode *MNew = MDMap.lookup(M))
- I->setMetadata(LLVMContext::MD_alias_scope, MNew);
-
- if (MDNode *M = I->getMetadata(LLVMContext::MD_noalias))
- if (MDNode *MNew = MDMap.lookup(M))
- I->setMetadata(LLVMContext::MD_noalias, MNew);
-
- if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(I))
- if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
- Decl->setScopeList(MNew);
+ for (BasicBlock &BB : make_range(FStart, FEnd)) {
+ for (Instruction &I : BB) {
+ // TODO: The null checks for the MDMap.lookup() results should no longer
+ // be necessary.
+ if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))
+ if (MDNode *MNew = MDMap.lookup(M))
+ I.setMetadata(LLVMContext::MD_alias_scope, MNew);
+
+ if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))
+ if (MDNode *MNew = MDMap.lookup(M))
+ I.setMetadata(LLVMContext::MD_noalias, MNew);
+
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))
+ Decl->setScopeList(MNew);
+ }
}
}
@@ -1926,7 +1912,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Now clone the inlined noalias scope metadata.
SAMetadataCloner.clone();
- SAMetadataCloner.remap(VMap);
+ SAMetadataCloner.remap(FirstNewBlock, Caller->end());
// Add noalias metadata if necessary.
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR);
@@ -1936,7 +1922,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
AddReturnAttributes(CB, VMap);
// Propagate metadata on the callsite if necessary.
- PropagateCallSiteMetadata(CB, VMap);
+ PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());
// Register any cloned assumptions.
if (IFI.GetAssumptionCache)