aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp3
-rw-r--r--clang/lib/Format/TokenAnnotator.cpp17
-rw-r--r--clang/lib/Format/UnwrappedLineParser.cpp41
-rw-r--r--clang/lib/Headers/openmp_wrappers/complex2
-rw-r--r--clang/lib/Sema/SemaStmt.cpp3
-rw-r--r--compiler-rt/lib/builtins/clear_cache.c9
-rw-r--r--lld/ELF/InputSection.cpp47
-rw-r--r--lld/ELF/InputSection.h1
-rw-r--r--lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h1
-rw-r--r--llvm/include/llvm/MC/MCContext.h16
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h2
-rw-r--r--llvm/lib/IR/Constants.cpp8
-rw-r--r--llvm/lib/MC/MCContext.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp45
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp23
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp40
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp36
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp7
22 files changed, 209 insertions, 150 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c09797e91b99..ca98c7a57446 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2120,11 +2120,12 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
// Ensure we do not inline the function. This is trivially true for the ones
- // passed to __kmpc_fork_call but the ones calles in serialized regions
+ // passed to __kmpc_fork_call but the ones called in serialized regions
// could be inlined. This is not a perfect but it is closer to the invariant
// we want, namely, every data environment starts with a new function.
// TODO: We should pass the if condition to the runtime function and do the
// handling there. Much cleaner code.
+ OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 54e6c7d38e7d..11dc661abc24 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2398,7 +2398,7 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) {
// This function heuristically determines whether 'Current' starts the name of a
// function declaration.
-static bool isFunctionDeclarationName(const FormatToken &Current,
+static bool isFunctionDeclarationName(bool IsCpp, const FormatToken &Current,
const AnnotatedLine &Line) {
auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * {
for (; Next; Next = Next->Next) {
@@ -2476,14 +2476,21 @@ static bool isFunctionDeclarationName(const FormatToken &Current,
if (Next->MatchingParen->Next &&
Next->MatchingParen->Next->is(TT_PointerOrReference))
return true;
- // Check for K&R C function definitions, e.g.:
+
+ // Check for K&R C function definitions (and C++ function definitions with
+ // unnamed parameters), e.g.:
// int f(i)
// {
// return i + 1;
// }
- if (Next->Next && Next->Next->is(tok::identifier) &&
- !(Next->MatchingParen->Next && Next->MatchingParen->Next->is(tok::semi)))
+ // bool g(size_t = 0, bool b = false)
+ // {
+ // return !b;
+ // }
+ if (IsCpp && Next->Next && Next->Next->is(tok::identifier) &&
+ !Line.endsWith(tok::semi))
return true;
+
for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen;
Tok = Tok->Next) {
if (Tok->is(TT_TypeDeclarationParen))
@@ -2544,7 +2551,7 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
calculateArrayInitializerColumnList(Line);
while (Current) {
- if (isFunctionDeclarationName(*Current, Line))
+ if (isFunctionDeclarationName(Style.isCpp(), *Current, Line))
Current->setType(TT_FunctionDeclarationName);
if (Current->is(TT_LineComment)) {
if (Current->Previous->is(BK_BracedInit) &&
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 103e3559b120..673986d16af2 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -994,6 +994,13 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
Keywords.kw_import, tok::kw_export);
}
+// Checks whether a token is a type in K&R C (aka C78).
+static bool isC78Type(const FormatToken &Tok) {
+ return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
+ tok::kw_unsigned, tok::kw_float, tok::kw_double,
+ tok::identifier);
+}
+
// This function checks whether a token starts the first parameter declaration
// in a K&R C (aka C78) function definition, e.g.:
// int f(a, b)
@@ -1001,13 +1008,24 @@ static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
// {
// return a + b;
// }
-static bool isC78ParameterDecl(const FormatToken *Tok) {
- if (!Tok)
+static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
+ const FormatToken *FuncName) {
+ assert(Tok);
+ assert(Next);
+ assert(FuncName);
+
+ if (FuncName->isNot(tok::identifier))
return false;
- if (!Tok->isOneOf(tok::kw_int, tok::kw_char, tok::kw_float, tok::kw_double,
- tok::kw_struct, tok::kw_union, tok::kw_long, tok::kw_short,
- tok::kw_unsigned, tok::kw_register, tok::identifier))
+ const FormatToken *Prev = FuncName->Previous;
+ if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
+ return false;
+
+ if (!isC78Type(*Tok) &&
+ !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
+ return false;
+
+ if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
return false;
Tok = Tok->Previous;
@@ -1368,21 +1386,20 @@ void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
case tok::r_brace:
addUnwrappedLine();
return;
- case tok::l_paren:
+ case tok::l_paren: {
parseParens();
// Break the unwrapped line if a K&R C function definition has a parameter
// declaration.
- if (!IsTopLevel || !Style.isCpp())
- break;
- if (!Previous || Previous->isNot(tok::identifier))
+ if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
break;
- if (Previous->Previous && Previous->Previous->is(tok::at))
- break;
- if (isC78ParameterDecl(FormatTok)) {
+ const unsigned Position = Tokens->getPosition() + 1;
+ assert(Position < AllTokens.size());
+ if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
addUnwrappedLine();
return;
}
break;
+ }
case tok::kw_operator:
nextToken();
if (FormatTok->isBinaryOperator())
diff --git a/clang/lib/Headers/openmp_wrappers/complex b/clang/lib/Headers/openmp_wrappers/complex
index dfd6193c97cb..eb1ead207d58 100644
--- a/clang/lib/Headers/openmp_wrappers/complex
+++ b/clang/lib/Headers/openmp_wrappers/complex
@@ -36,7 +36,7 @@
#ifndef _LIBCPP_STD_VER
#pragma omp begin declare variant match( \
- device = {arch(nvptx, nvptx64)}, \
+ device = {arch(amdgcn, nvptx, nvptx64)}, \
implementation = {extension(match_any, allow_templates)})
#include <complex_cmath.h>
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 3baccec2d7bb..f7e4110e6110 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -3481,7 +3481,8 @@ VerifyInitializationSequenceCXX98(const Sema &S,
ExprResult Sema::PerformMoveOrCopyInitialization(
const InitializedEntity &Entity, const NamedReturnInfo &NRInfo, Expr *Value,
bool SupressSimplerImplicitMoves) {
- if ((!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) &&
+ if (getLangOpts().CPlusPlus &&
+ (!getLangOpts().CPlusPlus2b || SupressSimplerImplicitMoves) &&
NRInfo.isMoveEligible()) {
ImplicitCastExpr AsRvalue(ImplicitCastExpr::OnStack, Value->getType(),
CK_NoOp, Value, VK_XValue, FPOptionsOverride());
diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c
index 3c12b74e8fa6..da0715914b41 100644
--- a/compiler-rt/lib/builtins/clear_cache.c
+++ b/compiler-rt/lib/builtins/clear_cache.c
@@ -35,7 +35,7 @@ uintptr_t GetCurrentProcess(void);
#include <machine/sysarch.h>
#endif
-#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__))
+#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || defined(__riscv))
// clang-format off
#include <sys/types.h>
#include <machine/sysarch.h>
@@ -166,6 +166,13 @@ void __clear_cache(void *start, void *end) {
: "=r"(start_reg)
: "r"(start_reg), "r"(end_reg), "r"(flags), "r"(syscall_nr));
assert(start_reg == 0 && "Cache flush syscall failed.");
+#elif defined(__riscv) && defined(__OpenBSD__)
+ struct riscv_sync_icache_args arg;
+
+ arg.addr = (uintptr_t)start;
+ arg.len = (uintptr_t)end - (uintptr_t)start;
+
+ sysarch(RISCV_SYNC_ICACHE, &arg);
#else
#if __APPLE__
// On Darwin, sys_icache_invalidate() provides this functionality
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 1f9fa961fc26..7d952e9037f1 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -88,7 +88,22 @@ InputSectionBase::InputSectionBase(InputFile *file, uint64_t flags,
if (!zlib::isAvailable())
error(toString(file) + ": contains a compressed section, " +
"but zlib is not available");
- parseCompressedHeader();
+ switch (config->ekind) {
+ case ELF32LEKind:
+ parseCompressedHeader<ELF32LE>();
+ break;
+ case ELF32BEKind:
+ parseCompressedHeader<ELF32BE>();
+ break;
+ case ELF64LEKind:
+ parseCompressedHeader<ELF64LE>();
+ break;
+ case ELF64BEKind:
+ parseCompressedHeader<ELF64BE>();
+ break;
+ default:
+ llvm_unreachable("unknown ELFT");
+ }
}
}
@@ -210,10 +225,7 @@ OutputSection *SectionBase::getOutputSection() {
// When a section is compressed, `rawData` consists with a header followed
// by zlib-compressed data. This function parses a header to initialize
// `uncompressedSize` member and remove the header from `rawData`.
-void InputSectionBase::parseCompressedHeader() {
- using Chdr64 = typename ELF64LE::Chdr;
- using Chdr32 = typename ELF32LE::Chdr;
-
+template <typename ELFT> void InputSectionBase::parseCompressedHeader() {
// Old-style header
if (name.startswith(".zdebug")) {
if (!toStringRef(rawData).startswith("ZLIB")) {
@@ -239,32 +251,13 @@ void InputSectionBase::parseCompressedHeader() {
assert(flags & SHF_COMPRESSED);
flags &= ~(uint64_t)SHF_COMPRESSED;
- // New-style 64-bit header
- if (config->is64) {
- if (rawData.size() < sizeof(Chdr64)) {
- error(toString(this) + ": corrupted compressed section");
- return;
- }
-
- auto *hdr = reinterpret_cast<const Chdr64 *>(rawData.data());
- if (hdr->ch_type != ELFCOMPRESS_ZLIB) {
- error(toString(this) + ": unsupported compression type");
- return;
- }
-
- uncompressedSize = hdr->ch_size;
- alignment = std::max<uint32_t>(hdr->ch_addralign, 1);
- rawData = rawData.slice(sizeof(*hdr));
- return;
- }
-
- // New-style 32-bit header
- if (rawData.size() < sizeof(Chdr32)) {
+ // New-style header
+ if (rawData.size() < sizeof(typename ELFT::Chdr)) {
error(toString(this) + ": corrupted compressed section");
return;
}
- auto *hdr = reinterpret_cast<const Chdr32 *>(rawData.data());
+ auto *hdr = reinterpret_cast<const typename ELFT::Chdr *>(rawData.data());
if (hdr->ch_type != ELFCOMPRESS_ZLIB) {
error(toString(this) + ": unsupported compression type");
return;
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 5b91c1c90bd2..c914d0b42155 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -238,6 +238,7 @@ public:
}
protected:
+ template <typename ELFT>
void parseCompressedHeader();
void uncompress() const;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
index ba873ba4436b..96cab49d5ac8 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterInfoPOSIX_arm64.h
@@ -110,6 +110,7 @@ public:
bool IsSVEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskSVE); }
bool IsPAuthEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskPAuth); }
+ bool IsMTEEnabled() const { return m_opt_regsets.AnySet(eRegsetMaskMTE); }
bool IsSVEReg(unsigned reg) const;
bool IsSVEZReg(unsigned reg) const;
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 877b2dc4ac92..2ff9c967e848 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -374,17 +374,17 @@ namespace llvm {
bool operator<(const ELFEntrySizeKey &Other) const {
if (SectionName != Other.SectionName)
return SectionName < Other.SectionName;
- if (Flags != Other.Flags)
- return Flags < Other.Flags;
+ if ((Flags & ELF::SHF_STRINGS) != (Other.Flags & ELF::SHF_STRINGS))
+ return Other.Flags & ELF::SHF_STRINGS;
return EntrySize < Other.EntrySize;
}
};
- // Symbols must be assigned to a section with a compatible entry size and
- // flags. This map is used to assign unique IDs to sections to distinguish
- // between sections with identical names but incompatible entry sizes and/or
- // flags. This can occur when a symbol is explicitly assigned to a section,
- // e.g. via __attribute__((section("myname"))).
+ // Symbols must be assigned to a section with a compatible entry
+ // size. This map is used to assign unique IDs to sections to
+ // distinguish between sections with identical names but incompatible entry
+ // sizes. This can occur when a symbol is explicitly assigned to a
+ // section, e.g. via __attribute__((section("myname"))).
std::map<ELFEntrySizeKey, unsigned> ELFEntrySizeMap;
// This set is used to record the generic mergeable section names seen.
@@ -592,8 +592,6 @@ namespace llvm {
bool isELFGenericMergeableSection(StringRef Name);
- /// Return the unique ID of the section with the given name, flags and entry
- /// size, if it exists.
Optional<unsigned> getELFUniqueIDForEntsize(StringRef SectionName,
unsigned Flags,
unsigned EntrySize);
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 635b706d0bef..6203f37ebb01 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -65,7 +65,7 @@ private:
bool processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI);
bool processMemMove(MemMoveInst *M);
bool performCallSlotOptzn(Instruction *cpyLoad, Instruction *cpyStore,
- Value *cpyDst, Value *cpySrc, uint64_t cpyLen,
+ Value *cpyDst, Value *cpySrc, TypeSize cpyLen,
Align cpyAlign, CallInst *C);
bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
bool processMemSetMemCpyDependence(MemCpyInst *MemCpy, MemSetInst *MemSet);
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 6c75085a6678..1e72cb4d3a66 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -315,9 +315,11 @@ containsUndefinedElement(const Constant *C,
return false;
for (unsigned i = 0, e = cast<FixedVectorType>(VTy)->getNumElements();
- i != e; ++i)
- if (HasFn(C->getAggregateElement(i)))
- return true;
+ i != e; ++i) {
+ if (Constant *Elem = C->getAggregateElement(i))
+ if (HasFn(Elem))
+ return true;
+ }
}
return false;
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index aa4051aa2400..cc349af6393b 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -586,7 +586,7 @@ void MCContext::recordELFMergeableSectionInfo(StringRef SectionName,
unsigned Flags, unsigned UniqueID,
unsigned EntrySize) {
bool IsMergeable = Flags & ELF::SHF_MERGE;
- if (UniqueID == GenericSectionID)
+ if (IsMergeable && (UniqueID == GenericSectionID))
ELFSeenGenericMergeableSections.insert(SectionName);
// For mergeable sections or non-mergeable sections with a generic mergeable
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60c00f47859b..494554ae7b33 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4161,7 +4161,8 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
if (VT.getVectorElementType() == MVT::i32 &&
- VT.getVectorElementCount().getKnownMinValue() >= 4)
+ VT.getVectorElementCount().getKnownMinValue() >= 4 &&
+ !VT.isFixedLengthVector())
return true;
return false;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 08e4a119127c..edf4d06d4d59 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -103,8 +103,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_BSWAP)
.legalFor({s32, s64, v4s32, v2s32, v2s64})
.clampScalar(0, s32, s64)
- .widenScalarToNextPow2(0)
- .customIf(typeIs(0, v2s16)); // custom lower as G_REV32 + G_LSHR
+ .widenScalarToNextPow2(0);
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
.legalFor({s32, s64, v2s32, v4s32, v4s16, v8s16, v16s8, v8s8})
@@ -799,8 +798,6 @@ bool AArch64LegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, MRI, MIRBuilder, Observer);
- case TargetOpcode::G_BSWAP:
- return legalizeBSwap(MI, MRI, MIRBuilder);
case TargetOpcode::G_SHL:
case TargetOpcode::G_ASHR:
case TargetOpcode::G_LSHR:
@@ -1015,46 +1012,6 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
return true;
}
-bool AArch64LegalizerInfo::legalizeBSwap(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const {
- assert(MI.getOpcode() == TargetOpcode::G_BSWAP);
-
- // The <2 x half> case needs special lowering because there isn't an
- // instruction that does that directly. Instead, we widen to <8 x i8>
- // and emit a G_REV32 followed by a G_LSHR knowing that instruction selection
- // will later match them as:
- //
- // rev32.8b v0, v0
- // ushr.2s v0, v0, #16
- //
- // We could emit those here directly, but it seems better to keep things as
- // generic as possible through legalization, and avoid committing layering
- // violations by legalizing & selecting here at the same time.
-
- Register ValReg = MI.getOperand(1).getReg();
- assert(LLT::fixed_vector(2, 16) == MRI.getType(ValReg));
- const LLT v2s32 = LLT::fixed_vector(2, 32);
- const LLT v8s8 = LLT::fixed_vector(8, 8);
- const LLT s32 = LLT::scalar(32);
-
- auto Undef = MIRBuilder.buildUndef(v8s8);
- auto Insert =
- MIRBuilder
- .buildInstr(TargetOpcode::INSERT_SUBREG, {v8s8}, {Undef, ValReg})
- .addImm(AArch64::ssub);
- auto Rev32 = MIRBuilder.buildInstr(AArch64::G_REV32, {v8s8}, {Insert});
- auto Bitcast = MIRBuilder.buildBitcast(v2s32, Rev32);
- auto Amt = MIRBuilder.buildConstant(v2s32, 16);
- auto UShr =
- MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {v2s32}, {Bitcast, Amt});
- auto Zero = MIRBuilder.buildConstant(s32, 0);
- auto Extract = MIRBuilder.buildExtractVectorElement(s32, UShr, Zero);
- MIRBuilder.buildBitcast({MI.getOperand(0).getReg()}, Extract);
- MI.eraseFromParent();
- return true;
-}
-
bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 78fc24559d71..35456d95dc2b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -35,8 +35,6 @@ public:
MachineInstr &MI) const override;
private:
- bool legalizeBSwap(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIRBuilder) const;
bool legalizeVaArg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const;
bool legalizeLoadStore(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 4cde7971e597..86cb86b19d62 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -671,7 +671,9 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
MF.insert(MBBIter, testMBB);
MF.insert(MBBIter, tailMBB);
- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
+ : Is64Bit ? X86::R11D
+ : X86::EAX;
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed)
.addReg(StackPtr)
.setMIFlag(MachineInstr::FrameSetup);
@@ -1092,7 +1094,9 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MF.insert(MBBIter, bodyMBB);
MF.insert(MBBIter, footMBB);
const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi;
- Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 : X86::R11D;
+ Register FinalStackProbed = Uses64BitFramePtr ? X86::R11
+ : Is64Bit ? X86::R11D
+ : X86::EAX;
// Setup entry block
{
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index d6b97915ede6..75eec25f5807 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1996,7 +1996,8 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
UndefValue::get(Int8Ty), F->getName() + ".ID");
for (Use *U : ToBeReplacedStateMachineUses)
- U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
+ U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ ID, U->get()->getType()));
++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
@@ -3183,10 +3184,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
IsWorker->setDebugLoc(DLoc);
BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
+ Module &M = *Kernel->getParent();
+
// Create local storage for the work function pointer.
+ const DataLayout &DL = M.getDataLayout();
Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
- AllocaInst *WorkFnAI = new AllocaInst(VoidPtrTy, 0, "worker.work_fn.addr",
- &Kernel->getEntryBlock().front());
+ Instruction *WorkFnAI =
+ new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
+ "worker.work_fn.addr", &Kernel->getEntryBlock().front());
WorkFnAI->setDebugLoc(DLoc);
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
@@ -3199,13 +3204,23 @@ struct AAKernelInfoFunction : AAKernelInfo {
Value *Ident = KernelInitCB->getArgOperand(0);
Value *GTid = KernelInitCB;
- Module &M = *Kernel->getParent();
FunctionCallee BarrierFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_barrier_simple_spmd);
CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
->setDebugLoc(DLoc);
+ if (WorkFnAI->getType()->getPointerAddressSpace() !=
+ (unsigned int)AddressSpace::Generic) {
+ WorkFnAI = new AddrSpaceCastInst(
+ WorkFnAI,
+ PointerType::getWithSamePointeeType(
+ cast<PointerType>(WorkFnAI->getType()),
+ (unsigned int)AddressSpace::Generic),
+ WorkFnAI->getName() + ".generic", StateMachineBeginBB);
+ WorkFnAI->setDebugLoc(DLoc);
+ }
+
FunctionCallee KernelParallelFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_kernel_parallel);
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2e36c50b75fc..9afbe0e9a2a5 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -178,9 +178,9 @@ public:
}
void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
- int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
-
- addRange(OffsetFromFirst, StoreSize, SI->getPointerOperand(),
+ TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
+ assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
+ addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
SI->getAlign().value(), SI);
}
@@ -371,6 +371,11 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
Value *ByteVal) {
const DataLayout &DL = StartInst->getModule()->getDataLayout();
+ // We can't track scalable types
+ if (StoreInst *SI = dyn_cast<StoreInst>(StartInst))
+ if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
+ return nullptr;
+
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
// Join these together into ranges, so we can decide whether contiguous blocks
@@ -426,6 +431,10 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
break;
+ // We can't track ranges involving scalable types.
+ if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
+ break;
+
// Check to see if this stored value is of the same byte-splattable value.
Value *StoredByte = isBytewiseValue(StoredVal, DL);
if (isa<UndefValue>(ByteVal) && StoredByte)
@@ -859,7 +868,7 @@ bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
/// the call write its result directly into the destination of the memcpy.
bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
Instruction *cpyStore, Value *cpyDest,
- Value *cpySrc, uint64_t cpyLen,
+ Value *cpySrc, TypeSize cpySize,
Align cpyAlign, CallInst *C) {
// The general transformation to keep in mind is
//
@@ -875,6 +884,10 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
// src only holds uninitialized values at the moment of the call, meaning that
// the memcpy can be discarded rather than moved.
+ // We can't optimize scalable types.
+ if (cpySize.isScalable())
+ return false;
+
// Lifetime marks shouldn't be operated on.
if (Function *F = C->getCalledFunction())
if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
@@ -893,13 +906,13 @@ bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
srcArraySize->getZExtValue();
- if (cpyLen < srcSize)
+ if (cpySize < srcSize)
return false;
// Check that accessing the first srcSize bytes of dest will not cause a
// trap. Otherwise the transform is invalid since it might cause a trap
// to occur earlier than it otherwise would.
- if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpyLen),
+ if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
DL, C, DT))
return false;
@@ -1452,9 +1465,10 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
// of conservatively taking the minimum?
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
M->getSourceAlign().valueOrOne());
- if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment,
- C)) {
+ if (performCallSlotOptzn(
+ M, M, M->getDest(), M->getSource(),
+ TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
+ C)) {
LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
<< " call: " << *C << "\n"
<< " memcpy: " << *M << "\n");
@@ -1509,7 +1523,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
Align Alignment = std::min(M->getDestAlign().valueOrOne(),
M->getSourceAlign().valueOrOne());
if (performCallSlotOptzn(M, M, M->getDest(), M->getSource(),
- CopySize->getZExtValue(), Alignment, C)) {
+ TypeSize::getFixed(CopySize->getZExtValue()),
+ Alignment, C)) {
eraseInstruction(M);
++NumMemCpyInstr;
return true;
@@ -1584,7 +1599,7 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// Find out what feeds this byval argument.
Value *ByValArg = CB.getArgOperand(ArgNo);
Type *ByValTy = CB.getParamByValType(ArgNo);
- uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
+ TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
MemCpyInst *MDep = nullptr;
if (EnableMemorySSA) {
@@ -1612,7 +1627,8 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (!C1 || C1->getValue().getZExtValue() < ByValSize)
+ if (!C1 || !TypeSize::isKnownGE(
+ TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index b9cccc2af309..b1c105258027 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1587,10 +1587,12 @@ deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
BB->eraseFromParent();
}
-static void deleteDeadBlocksFromLoop(Loop &L,
- SmallVectorImpl<BasicBlock *> &ExitBlocks,
- DominatorTree &DT, LoopInfo &LI,
- MemorySSAUpdater *MSSAU) {
+static void
+deleteDeadBlocksFromLoop(Loop &L,
+ SmallVectorImpl<BasicBlock *> &ExitBlocks,
+ DominatorTree &DT, LoopInfo &LI,
+ MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
SmallSetVector<BasicBlock *, 8> DeadBlockSet;
@@ -1640,6 +1642,7 @@ static void deleteDeadBlocksFromLoop(Loop &L,
}) &&
"If the child loop header is dead all blocks in the child loop must "
"be dead as well!");
+ DestroyLoopCB(*ChildL, ChildL->getName());
LI.destroy(ChildL);
return true;
});
@@ -1980,6 +1983,8 @@ static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
ParentL->removeChildLoop(llvm::find(*ParentL, &L));
else
LI.removeLoop(llvm::find(LI, &L));
+ // markLoopAsDeleted for L should be triggered by the caller (it is typically
+ // done by using the UnswitchCB callback).
LI.destroy(&L);
return false;
}
@@ -2019,7 +2024,8 @@ static void unswitchNontrivialInvariants(
SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -2319,7 +2325,7 @@ static void unswitchNontrivialInvariants(
// Now that our cloned loops have been built, we can update the original loop.
// First we delete the dead blocks from it and then we rebuild the loop
// structure taking these deletions into account.
- deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU);
+ deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -2670,7 +2676,8 @@ static bool unswitchBestCondition(
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Collect all invariant conditions within this loop (as opposed to an inner
// loop which would be handled when visiting that inner loop).
SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
@@ -2958,7 +2965,7 @@ static bool unswitchBestCondition(
<< "\n");
unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
ExitBlocks, PartialIVInfo, DT, LI, AC,
- UnswitchCB, SE, MSSAU);
+ UnswitchCB, SE, MSSAU, DestroyLoopCB);
return true;
}
@@ -2988,7 +2995,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
bool NonTrivial,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
- ScalarEvolution *SE, MemorySSAUpdater *MSSAU) {
+ ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
@@ -3036,7 +3044,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// Try to unswitch the best invariant condition. We prefer this full unswitch to
// a partial unswitch when possible below the threshold.
- if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU))
+ if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
+ DestroyLoopCB))
return true;
// No other opportunities to unswitch.
@@ -3083,6 +3092,10 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
U.markLoopAsDeleted(L, LoopName);
};
+ auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
+ U.markLoopAsDeleted(L, Name);
+ };
+
Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
MSSAU = MemorySSAUpdater(AR.MSSA);
@@ -3091,7 +3104,8 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
UnswitchCB, &AR.SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr))
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DestroyLoopCB))
return PreservedAnalyses::all();
if (AR.MSSA && VerifyMemorySSA)
@@ -3179,12 +3193,17 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
LPM.markLoopAsDeleted(*L);
};
+ auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
+ LPM.markLoopAsDeleted(L);
+ };
+
if (MSSA && VerifyMemorySSA)
MSSA->verifyMemorySSA();
bool Changed =
unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
- MSSAU.hasValue() ? MSSAU.getPointer() : nullptr);
+ MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
+ DestroyLoopCB);
if (MSSA && VerifyMemorySSA)
MSSA->verifyMemorySSA();
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 583bb379488e..d86ecbb6db00 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1094,17 +1094,24 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
// Update (liveout) uses of bonus instructions,
// now that the bonus instruction has been cloned into predecessor.
- SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(BonusInst.getType(),
- (NewBonusInst->getName() + ".merge").str());
- SSAUpdate.AddAvailableValue(BB, &BonusInst);
- SSAUpdate.AddAvailableValue(PredBlock, NewBonusInst);
+ // Note that we expect to be in a block-closed SSA form for this to work!
for (Use &U : make_early_inc_range(BonusInst.uses())) {
auto *UI = cast<Instruction>(U.getUser());
- if (UI->getParent() != PredBlock)
- SSAUpdate.RewriteUseAfterInsertions(U);
- else // Use is in the same block as, and comes before, NewBonusInst.
- SSAUpdate.RewriteUse(U);
+ auto *PN = dyn_cast<PHINode>(UI);
+ if (!PN) {
+ assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
+ "If the user is not a PHI node, then it should be in the same "
+ "block as, and come after, the original bonus instruction.");
+ continue; // Keep using the original bonus instruction.
+ }
+ // Is this the block-closed SSA form PHI node?
+ if (PN->getIncomingBlock(U) == BB)
+ continue; // Great, keep using the original bonus instruction.
+ // The only other alternative is an "use" when coming from
+ // the predecessor block - here we should refer to the cloned bonus instr.
+ assert(PN->getIncomingBlock(U) == PredBlock &&
+ "Not in block-closed SSA form?");
+ U.set(NewBonusInst);
}
}
}
@@ -3207,6 +3214,17 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
// Early exits once we reach the limit.
if (NumBonusInsts > BonusInstThreshold)
return false;
+
+ auto IsBCSSAUse = [BB, &I](Use &U) {
+ auto *UI = cast<Instruction>(U.getUser());
+ if (auto *PN = dyn_cast<PHINode>(UI))
+ return PN->getIncomingBlock(U) == BB;
+ return UI->getParent() == BB && I.comesBefore(UI);
+ };
+
+ // Does this instruction require rewriting of uses?
+ if (!all_of(I.uses(), IsBCSSAUse))
+ return false;
}
// Ok, we have the budget. Perform the transformation.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 52b5ae083d0e..c05a8408e1fd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -234,12 +234,15 @@ bool VPlanTransforms::mergeReplicateRegions(VPlan &Plan) {
for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
VPValue *PredInst1 =
cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
- for (VPUser *U : Phi1ToMove.getVPSingleValue()->users()) {
+ VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
+ SmallVector<VPUser *> Users(Phi1ToMoveV->user_begin(),
+ Phi1ToMoveV->user_end());
+ for (VPUser *U : Users) {
auto *UI = dyn_cast<VPRecipeBase>(U);
if (!UI || UI->getParent() != Then2)
continue;
for (unsigned I = 0, E = U->getNumOperands(); I != E; ++I) {
- if (Phi1ToMove.getVPSingleValue() != U->getOperand(I))
+ if (Phi1ToMoveV != U->getOperand(I))
continue;
U->setOperand(I, PredInst1);
}