aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2019-08-20 21:35:15 +0000
committerDimitry Andric <dim@FreeBSD.org>2019-08-20 21:35:15 +0000
commit464f838b7b7a19b95ae4b33010858de341c620a5 (patch)
treef662da7fb9ec842d5689f6a8611d098b3c4fd75a
parente6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff)
Vendor import of llvm release_90 branch r369369:vendor/llvm/llvm-release_90-r369369
Notes
Notes: svn path=/vendor/llvm/dist-release_90/; revision=351303 svn path=/vendor/llvm/llvm-release_90-r369369/; revision=351304; tag=vendor/llvm/llvm-release_90-r369369
-rw-r--r--include/llvm/Analysis/AliasAnalysis.h2
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h28
-rw-r--r--include/llvm/CodeGen/TargetLowering.h1
-rw-r--r--include/llvm/ExecutionEngine/Orc/LambdaResolver.h1
-rw-r--r--include/llvm/MC/MCContext.h13
-rw-r--r--include/llvm/Support/AArch64TargetParser.def58
-rw-r--r--include/llvm/Support/AArch64TargetParser.h2
-rw-r--r--include/llvm/Support/ARMTargetParser.h20
-rw-r--r--include/llvm/Transforms/Utils/BypassSlowDivision.h11
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp3
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp17
-rw-r--r--lib/CodeGen/MachineCSE.cpp25
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp35
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp18
-rw-r--r--lib/MC/MCContext.cpp6
-rw-r--r--lib/MC/MCParser/AsmParser.cpp4
-rw-r--r--lib/Object/RelocationResolver.cpp4
-rw-r--r--lib/Support/AArch64TargetParser.cpp4
-rw-r--r--lib/Support/Unix/Path.inc2
-rw-r--r--lib/Target/AArch64/AArch64.td2
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp16
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td2
-rw-r--r--lib/Target/AArch64/AArch64SVEInstrInfo.td164
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp13
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.h3
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp2
-rw-r--r--lib/Target/AArch64/SVEInstrFormats.td271
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp11
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp6
-rw-r--r--lib/Target/BPF/BPFAbstractMemberAccess.cpp22
-rw-r--r--lib/Target/BPF/BTFDebug.cpp52
-rw-r--r--lib/Target/BPF/BTFDebug.h6
-rw-r--r--lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp18
-rw-r--r--lib/Target/RISCV/RISCVFrameLowering.cpp48
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.cpp31
-rw-r--r--lib/Target/RISCV/RISCVISelLowering.h1
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--lib/Target/X86/X86.td3
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp39
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp46
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp25
-rw-r--r--lib/Transforms/Scalar/DivRemPairs.cpp114
-rw-r--r--lib/Transforms/Scalar/SpeculateAroundPHIs.cpp6
49 files changed, 836 insertions, 340 deletions
diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h
index 948341554f23..282142f51bb3 100644
--- a/include/llvm/Analysis/AliasAnalysis.h
+++ b/include/llvm/Analysis/AliasAnalysis.h
@@ -949,7 +949,7 @@ template <typename DerivedT> class AAResultBase {
/// A pointer to the AAResults object that this AAResult is
/// aggregated within. May be null if not aggregated.
- AAResults *AAR;
+ AAResults *AAR = nullptr;
/// Helper to dispatch calls back through the derived type.
DerivedT &derived() { return static_cast<DerivedT &>(*this); }
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 12a970847021..45a598c898c8 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -269,7 +269,13 @@ class SelectionDAG {
using CallSiteInfo = MachineFunction::CallSiteInfo;
using CallSiteInfoImpl = MachineFunction::CallSiteInfoImpl;
- DenseMap<const SDNode *, CallSiteInfo> SDCallSiteInfo;
+
+ struct CallSiteDbgInfo {
+ CallSiteInfo CSInfo;
+ MDNode *HeapAllocSite = nullptr;
+ };
+
+ DenseMap<const SDNode *, CallSiteDbgInfo> SDCallSiteDbgInfo;
uint16_t NextPersistentId = 0;
@@ -1664,16 +1670,28 @@ public:
}
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo) {
- SDCallSiteInfo[CallNode] = std::move(CallInfo);
+ SDCallSiteDbgInfo[CallNode].CSInfo = std::move(CallInfo);
}
CallSiteInfo getSDCallSiteInfo(const SDNode *CallNode) {
- auto I = SDCallSiteInfo.find(CallNode);
- if (I != SDCallSiteInfo.end())
- return std::move(I->second);
+ auto I = SDCallSiteDbgInfo.find(CallNode);
+ if (I != SDCallSiteDbgInfo.end())
+ return std::move(I->second).CSInfo;
return CallSiteInfo();
}
+ void addHeapAllocSite(const SDNode *Node, MDNode *MD) {
+ SDCallSiteDbgInfo[Node].HeapAllocSite = MD;
+ }
+
+ /// Return the HeapAllocSite type associated with the SDNode, if it exists.
+ MDNode *getHeapAllocSite(const SDNode *Node) {
+ auto It = SDCallSiteDbgInfo.find(Node);
+ if (It == SDCallSiteDbgInfo.end())
+ return nullptr;
+ return It->second.HeapAllocSite;
+ }
+
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);
diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h
index d5cca60bb1b2..ca7548cd8d6f 100644
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@@ -3665,6 +3665,7 @@ public:
C_Register, // Constraint represents specific register(s).
C_RegisterClass, // Constraint represents any of register(s) in class.
C_Memory, // Memory constraint.
+ C_Immediate, // Requires an immediate.
C_Other, // Something else.
C_Unknown // Unsupported constraint.
};
diff --git a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
index 855e31b33549..84cbc53b73a5 100644
--- a/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
+++ b/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/OrcV1Deprecation.h"
#include <memory>
namespace llvm {
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 5c2124cc0d15..c40cd7c2c257 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -112,6 +112,9 @@ namespace llvm {
/// number of section symbols with the same name).
StringMap<bool, BumpPtrAllocator &> UsedNames;
+ /// Keeps track of labels that are used in inline assembly.
+ SymbolTable InlineAsmUsedLabelNames;
+
/// The next ID to dole out to an unnamed assembler temporary symbol with
/// a given prefix.
StringMap<unsigned> NextID;
@@ -377,6 +380,16 @@ namespace llvm {
/// APIs.
const SymbolTable &getSymbols() const { return Symbols; }
+ /// isInlineAsmLabel - Return true if the name is a label referenced in
+ /// inline assembly.
+ MCSymbol *getInlineAsmLabel(StringRef Name) const {
+ return InlineAsmUsedLabelNames.lookup(Name);
+ }
+
+ /// registerInlineAsmLabel - Records that the name is a label referenced in
+ /// inline assembly.
+ void registerInlineAsmLabel(MCSymbol *Sym);
+
/// @}
/// \name Section Management
diff --git a/include/llvm/Support/AArch64TargetParser.def b/include/llvm/Support/AArch64TargetParser.def
index e152f383b3ec..5cdf190a9f19 100644
--- a/include/llvm/Support/AArch64TargetParser.def
+++ b/include/llvm/Support/AArch64TargetParser.def
@@ -50,35 +50,35 @@ AARCH64_ARCH("armv8.5-a", ARMV8_5A, "8.5-A", "v8.5a",
#define AARCH64_ARCH_EXT_NAME(NAME, ID, FEATURE, NEGFEATURE)
#endif
// FIXME: This would be nicer were it tablegen
-AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
-AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
-AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
-AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
-AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
-AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
-AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
-AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
-AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
-AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
-AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
-AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
-AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
-AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
-AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
-AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
-AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
-AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
-AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
-AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
-AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
-AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
-AARCH64_ARCH_EXT_NAME("bitperm", AArch64::AEK_BITPERM, "+bitperm", "-bitperm")
-AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
-AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
-AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
-AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
-AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
-AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
+AARCH64_ARCH_EXT_NAME("invalid", AArch64::AEK_INVALID, nullptr, nullptr)
+AARCH64_ARCH_EXT_NAME("none", AArch64::AEK_NONE, nullptr, nullptr)
+AARCH64_ARCH_EXT_NAME("crc", AArch64::AEK_CRC, "+crc", "-crc")
+AARCH64_ARCH_EXT_NAME("lse", AArch64::AEK_LSE, "+lse", "-lse")
+AARCH64_ARCH_EXT_NAME("rdm", AArch64::AEK_RDM, "+rdm", "-rdm")
+AARCH64_ARCH_EXT_NAME("crypto", AArch64::AEK_CRYPTO, "+crypto","-crypto")
+AARCH64_ARCH_EXT_NAME("sm4", AArch64::AEK_SM4, "+sm4", "-sm4")
+AARCH64_ARCH_EXT_NAME("sha3", AArch64::AEK_SHA3, "+sha3", "-sha3")
+AARCH64_ARCH_EXT_NAME("sha2", AArch64::AEK_SHA2, "+sha2", "-sha2")
+AARCH64_ARCH_EXT_NAME("aes", AArch64::AEK_AES, "+aes", "-aes")
+AARCH64_ARCH_EXT_NAME("dotprod", AArch64::AEK_DOTPROD, "+dotprod","-dotprod")
+AARCH64_ARCH_EXT_NAME("fp", AArch64::AEK_FP, "+fp-armv8", "-fp-armv8")
+AARCH64_ARCH_EXT_NAME("simd", AArch64::AEK_SIMD, "+neon", "-neon")
+AARCH64_ARCH_EXT_NAME("fp16", AArch64::AEK_FP16, "+fullfp16", "-fullfp16")
+AARCH64_ARCH_EXT_NAME("fp16fml", AArch64::AEK_FP16FML, "+fp16fml", "-fp16fml")
+AARCH64_ARCH_EXT_NAME("profile", AArch64::AEK_PROFILE, "+spe", "-spe")
+AARCH64_ARCH_EXT_NAME("ras", AArch64::AEK_RAS, "+ras", "-ras")
+AARCH64_ARCH_EXT_NAME("sve", AArch64::AEK_SVE, "+sve", "-sve")
+AARCH64_ARCH_EXT_NAME("sve2", AArch64::AEK_SVE2, "+sve2", "-sve2")
+AARCH64_ARCH_EXT_NAME("sve2-aes", AArch64::AEK_SVE2AES, "+sve2-aes", "-sve2-aes")
+AARCH64_ARCH_EXT_NAME("sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4")
+AARCH64_ARCH_EXT_NAME("sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3")
+AARCH64_ARCH_EXT_NAME("sve2-bitperm", AArch64::AEK_SVE2BITPERM, "+sve2-bitperm", "-sve2-bitperm")
+AARCH64_ARCH_EXT_NAME("rcpc", AArch64::AEK_RCPC, "+rcpc", "-rcpc")
+AARCH64_ARCH_EXT_NAME("rng", AArch64::AEK_RAND, "+rand", "-rand")
+AARCH64_ARCH_EXT_NAME("memtag", AArch64::AEK_MTE, "+mte", "-mte")
+AARCH64_ARCH_EXT_NAME("ssbs", AArch64::AEK_SSBS, "+ssbs", "-ssbs")
+AARCH64_ARCH_EXT_NAME("sb", AArch64::AEK_SB, "+sb", "-sb")
+AARCH64_ARCH_EXT_NAME("predres", AArch64::AEK_PREDRES, "+predres", "-predres")
#undef AARCH64_ARCH_EXT_NAME
#ifndef AARCH64_CPU_NAME
diff --git a/include/llvm/Support/AArch64TargetParser.h b/include/llvm/Support/AArch64TargetParser.h
index 965d38535e74..a2d2cf32d715 100644
--- a/include/llvm/Support/AArch64TargetParser.h
+++ b/include/llvm/Support/AArch64TargetParser.h
@@ -53,7 +53,7 @@ enum ArchExtKind : unsigned {
AEK_SVE2AES = 1 << 24,
AEK_SVE2SM4 = 1 << 25,
AEK_SVE2SHA3 = 1 << 26,
- AEK_BITPERM = 1 << 27,
+ AEK_SVE2BITPERM = 1 << 27,
};
enum class ArchKind {
diff --git a/include/llvm/Support/ARMTargetParser.h b/include/llvm/Support/ARMTargetParser.h
index 4b9070dea596..02d4c975129f 100644
--- a/include/llvm/Support/ARMTargetParser.h
+++ b/include/llvm/Support/ARMTargetParser.h
@@ -39,19 +39,13 @@ enum ArchExtKind : unsigned {
AEK_DSP = 1 << 10,
AEK_FP16 = 1 << 11,
AEK_RAS = 1 << 12,
- AEK_SVE = 1 << 13,
- AEK_DOTPROD = 1 << 14,
- AEK_SHA2 = 1 << 15,
- AEK_AES = 1 << 16,
- AEK_FP16FML = 1 << 17,
- AEK_SB = 1 << 18,
- AEK_SVE2 = 1 << 19,
- AEK_SVE2AES = 1 << 20,
- AEK_SVE2SM4 = 1 << 21,
- AEK_SVE2SHA3 = 1 << 22,
- AEK_BITPERM = 1 << 23,
- AEK_FP_DP = 1 << 24,
- AEK_LOB = 1 << 25,
+ AEK_DOTPROD = 1 << 13,
+ AEK_SHA2 = 1 << 14,
+ AEK_AES = 1 << 15,
+ AEK_FP16FML = 1 << 16,
+ AEK_SB = 1 << 17,
+ AEK_FP_DP = 1 << 18,
+ AEK_LOB = 1 << 19,
// Unsupported extensions.
AEK_OS = 0x8000000,
AEK_IWMMXT = 0x10000000,
diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h
index 471055921fa8..994b6ec9c229 100644
--- a/include/llvm/Transforms/Utils/BypassSlowDivision.h
+++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/IR/ValueHandle.h"
#include <cstdint>
namespace llvm {
@@ -28,8 +29,8 @@ class Value;
struct DivRemMapKey {
bool SignedOp;
- Value *Dividend;
- Value *Divisor;
+ AssertingVH<Value> Dividend;
+ AssertingVH<Value> Divisor;
DivRemMapKey(bool InSignedOp, Value *InDividend, Value *InDivisor)
: SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {}
@@ -50,8 +51,10 @@ template <> struct DenseMapInfo<DivRemMapKey> {
}
static unsigned getHashValue(const DivRemMapKey &Val) {
- return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^
- reinterpret_cast<uintptr_t>(Val.Divisor)) ^
+ return (unsigned)(reinterpret_cast<uintptr_t>(
+ static_cast<Value *>(Val.Dividend)) ^
+ reinterpret_cast<uintptr_t>(
+ static_cast<Value *>(Val.Divisor))) ^
(unsigned)Val.SignedOp;
}
};
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 7721e996aca5..5e49fec9c053 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 52b4bbea012b..e6f2aa9ef930 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
TheUse = InsertedShift;
}
- // If we removed all uses, nuke the shift.
+ // If we removed all uses, or there are none, nuke the shift.
if (ShiftI->use_empty()) {
salvageDebugInfo(*ShiftI);
ShiftI->eraseFromParent();
+ MadeChange = true;
}
return MadeChange;
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index a669e64692b9..05e994c9eb51 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -691,9 +691,17 @@ void LiveDebugValues::insertTransferDebugPair(
"No register supplied when handling a restore of a debug value");
MachineFunction *MF = MI.getMF();
DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
+
+ const DIExpression *NewExpr;
+ if (auto Fragment = DebugInstr->getDebugExpression()->getFragmentInfo())
+ NewExpr = *DIExpression::createFragmentExpression(DIB.createExpression(),
+ Fragment->OffsetInBits, Fragment->SizeInBits);
+ else
+ NewExpr = DIB.createExpression();
+
NewDebugInstr =
BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
- NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
+ NewReg, DebugInstr->getDebugVariable(), NewExpr);
VarLoc VL(*NewDebugInstr, LS);
ProcessVarLoc(VL, NewDebugInstr);
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
@@ -848,9 +856,14 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
<< "\n");
}
// Check if the register or spill location is the location of a debug value.
+ // FIXME: Don't create a spill transfer if there is a complex expression,
+ // because we currently cannot recover the original expression on restore.
for (unsigned ID : OpenRanges.getVarLocs()) {
+ const MachineInstr *DebugInstr = &VarLocIDs[ID].MI;
+
if (TKind == TransferKind::TransferSpill &&
- VarLocIDs[ID].isDescribedByReg() == Reg) {
+ VarLocIDs[ID].isDescribedByReg() == Reg &&
+ !DebugInstr->getDebugExpression()->isComplex()) {
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
} else if (TKind == TransferKind::TransferRestore &&
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 2df6d40d9293..a5af5cb72df9 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -66,6 +67,7 @@ namespace {
AliasAnalysis *AA;
MachineDominatorTree *DT;
MachineRegisterInfo *MRI;
+ MachineBlockFrequencyInfo *MBFI;
public:
static char ID; // Pass identification
@@ -83,6 +85,8 @@ namespace {
AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
}
void releaseMemory() override {
@@ -133,6 +137,11 @@ namespace {
bool isPRECandidate(MachineInstr *MI);
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
bool PerformSimplePRE(MachineDominatorTree *DT);
+ /// Heuristics to see if it's beneficial to move common computations of MBB
+ /// and MBB1 to CandidateBB.
+ bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1);
};
} // end anonymous namespace
@@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
if (!CMBB->isLegalToHoistInto())
continue;
+ if (!isBeneficalToHoistInto(CMBB, MBB, MBB1))
+ continue;
+
// Two instrs are partial redundant if their basic blocks are reachable
// from one to another but one doesn't dominate another.
if (CMBB != MBB1) {
@@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
return Changed;
}
+bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1) {
+ if (CandidateBB->getParent()->getFunction().hasMinSize())
+ return true;
+ assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
+ assert(DT->dominates(CandidateBB, MBB1) &&
+ "CandidateBB should dominate MBB1");
+ return MBFI->getBlockFreq(CandidateBB) <=
+ MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
+}
+
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
bool ChangedPRE, ChangedCSE;
ChangedPRE = PerformSimplePRE(DT);
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index aadcd7319799..2e720018262c 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -121,7 +121,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
- Entry.Symbols.push_back(Context.createTempSymbol());
+ Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
return Entry.Symbols;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 568c6191e512..e09f2e760f55 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
+
+ if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
+
GluedNodes.pop_back();
}
auto NewInsn =
@@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
NewInsn);
+ if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
}
// Insert all the dbg_values which have not already been inserted in source
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5852e693fa9f..6b0245dfd380 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1084,6 +1084,7 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
+ SDCallSiteDbgInfo.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e818dd27c05e..3c02c36a7d26 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8021,6 +8021,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(T, SDValue());
+ if (T.ConstraintType == TargetLowering::C_Immediate &&
+ OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
+ // We've delayed emitting a diagnostic like the "n" constraint because
+ // inlining could cause an integer showing up.
+ return emitInlineAsmError(
+ CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
+ "integer constant expression");
+
ExtraInfo.update(T);
}
@@ -8105,7 +8113,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
@@ -8119,13 +8128,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
- } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
+ } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
!OpInfo.isIndirect) ||
OpInfo.ConstraintType == TargetLowering::C_Register ||
OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass, and a target-defined fashion for C_Other). Find a
- // register that we can use.
+ // C_RegisterClass, and a target-defined fashion for
+ // C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
CS, "couldn't allocate output register for constraint '" +
@@ -8205,15 +8215,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
- if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
+ if (isa<ConstantSDNode>(InOperandVal)) {
+ emitInlineAsmError(CS, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
@@ -8250,7 +8269,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
- OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
"Unknown constraint type!");
// TODO: Support this.
@@ -8356,6 +8376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
Val = OpInfo.AssignedRegs.getCopyFromRegs(
DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
break;
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
OpInfo, DAG);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b260cd91d468..2d90dcba12b6 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3567,15 +3567,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
if (S == 1) {
switch (Constraint[0]) {
default: break;
- case 'r': return C_RegisterClass;
+ case 'r':
+ return C_RegisterClass;
case 'm': // memory
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
- case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
+ return C_Immediate;
+ case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
case 'p': // Address.
case 'X': // Allow ANY value.
@@ -3950,6 +3952,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -4069,11 +4072,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
- // If this is an 'other' constraint, see if the operand is valid for it.
- // For example, on X86 we might have an 'rI' constraint. If the operand
- // is an integer in the range [0..31] we want to use I (saving a load
- // of a register), otherwise we must use 'r'.
- if (CType == TargetLowering::C_Other && Op.getNode()) {
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if ((CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) && Op.getNode()) {
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 0dc2e2d37caf..6f9efec36361 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -61,6 +61,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
bool DoAutoReset)
: SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi),
Symbols(Allocator), UsedNames(Allocator),
+ InlineAsmUsedLabelNames(Allocator),
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
AutoReset(DoAutoReset) {
SecureLogFile = AsSecureLogFileName;
@@ -90,6 +91,7 @@ void MCContext::reset() {
XCOFFAllocator.DestroyAll();
MCSubtargetAllocator.DestroyAll();
+ InlineAsmUsedLabelNames.clear();
UsedNames.clear();
Symbols.clear();
Allocator.Reset();
@@ -272,6 +274,10 @@ void MCContext::setSymbolValue(MCStreamer &Streamer,
Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this));
}
+void MCContext::registerInlineAsmLabel(MCSymbol *Sym) {
+ InlineAsmUsedLabelNames[Sym->getName()] = Sym;
+}
+
//===----------------------------------------------------------------------===//
// Section Management
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 084f6a7a2e14..c2cbca2177be 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -1142,7 +1142,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
}
- MCSymbol *Sym = getContext().getOrCreateSymbol(SymbolName);
+ MCSymbol *Sym = getContext().getInlineAsmLabel(SymbolName);
+ if (!Sym)
+ Sym = getContext().getOrCreateSymbol(SymbolName);
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
diff --git a/lib/Object/RelocationResolver.cpp b/lib/Object/RelocationResolver.cpp
index 0a243f32e12c..41a0ac7fbd10 100644
--- a/lib/Object/RelocationResolver.cpp
+++ b/lib/Object/RelocationResolver.cpp
@@ -90,9 +90,9 @@ static bool supportsBPF(uint64_t Type) {
static uint64_t resolveBPF(RelocationRef R, uint64_t S, uint64_t A) {
switch (R.getType()) {
case ELF::R_BPF_64_32:
- return S & 0xFFFFFFFF;
+ return (S + A) & 0xFFFFFFFF;
case ELF::R_BPF_64_64:
- return S;
+ return S + A;
default:
llvm_unreachable("Invalid relocation type");
}
diff --git a/lib/Support/AArch64TargetParser.cpp b/lib/Support/AArch64TargetParser.cpp
index df4caa1f07fd..6f1d6d50eee2 100644
--- a/lib/Support/AArch64TargetParser.cpp
+++ b/lib/Support/AArch64TargetParser.cpp
@@ -96,8 +96,8 @@ bool AArch64::getExtensionFeatures(unsigned Extensions,
Features.push_back("+sve2-sm4");
if (Extensions & AEK_SVE2SHA3)
Features.push_back("+sve2-sha3");
- if (Extensions & AEK_BITPERM)
- Features.push_back("+bitperm");
+ if (Extensions & AEK_SVE2BITPERM)
+ Features.push_back("+sve2-bitperm");
if (Extensions & AEK_RCPC)
Features.push_back("+rcpc");
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index e80880c6b3cb..27c8a1bc9b74 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -1200,7 +1200,7 @@ namespace fs {
/// implementation.
std::error_code copy_file(const Twine &From, const Twine &To) {
uint32_t Flag = COPYFILE_DATA;
-#if __has_builtin(__builtin_available)
+#if __has_builtin(__builtin_available) && defined(COPYFILE_CLONE)
if (__builtin_available(macos 10.12, *)) {
bool IsSymlink;
if (std::error_code Error = is_symlink_file(From, IsSymlink))
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index e39c6995e367..f54db0aa03b2 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -115,7 +115,7 @@ def FeatureSVE2SM4 : SubtargetFeature<"sve2-sm4", "HasSVE2SM4", "true",
def FeatureSVE2SHA3 : SubtargetFeature<"sve2-sha3", "HasSVE2SHA3", "true",
"Enable SHA3 SVE2 instructions", [FeatureSVE2, FeatureSHA3]>;
-def FeatureSVE2BitPerm : SubtargetFeature<"bitperm", "HasSVE2BitPerm", "true",
+def FeatureSVE2BitPerm : SubtargetFeature<"sve2-bitperm", "HasSVE2BitPerm", "true",
"Enable bit permutation SVE2 instructions", [FeatureSVE2]>;
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7becc99fb5c7..6c250aea39f0 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -606,6 +606,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
+ MaxLoadsPerMemcmpOptSize = 4;
+ MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
+ ? MaxLoadsPerMemcmpOptSize : 8;
+
setStackPointerRegisterToSaveRestore(AArch64::SP);
setSchedulingPreference(Sched::Hybrid);
@@ -5661,8 +5665,6 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
switch (Constraint[0]) {
default:
break;
- case 'z':
- return C_Other;
case 'x':
case 'w':
return C_RegisterClass;
@@ -5670,6 +5672,16 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
// currently handle addresses it is the same as 'r'.
case 'Q':
return C_Memory;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'Y':
+ case 'Z':
+ return C_Immediate;
+ case 'z':
case 'S': // A symbolic address
return C_Other;
}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index eed53f36d574..020035c7f6c3 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -116,7 +116,7 @@ def HasSVE2SM4 : Predicate<"Subtarget->hasSVE2SM4()">,
def HasSVE2SHA3 : Predicate<"Subtarget->hasSVE2SHA3()">,
AssemblerPredicate<"FeatureSVE2SHA3", "sve2-sha3">;
def HasSVE2BitPerm : Predicate<"Subtarget->hasSVE2BitPerm()">,
- AssemblerPredicate<"FeatureSVE2BitPerm", "bitperm">;
+ AssemblerPredicate<"FeatureSVE2BitPerm", "sve2-bitperm">;
def HasRCPC : Predicate<"Subtarget->hasRCPC()">,
AssemblerPredicate<"FeatureRCPC", "rcpc">;
def HasAltNZCV : Predicate<"Subtarget->hasAlternativeNZCV()">,
diff --git a/lib/Target/AArch64/AArch64SVEInstrInfo.td b/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 79ab42f4c080..8e1ff999bd57 100644
--- a/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1164,6 +1164,13 @@ let Predicates = [HasSVE2] in {
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
+ // SVE2 predicated shifts
+ defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
+ defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
+ defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
+ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
+ defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
+
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
defm SADDLT_ZZZ : sve2_wide_int_arith_long<0b00001, "saddlt">;
@@ -1199,14 +1206,14 @@ let Predicates = [HasSVE2] in {
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
// SVE2 bitwise shift and insert
- defm SRI_ZZI : sve2_int_bin_cons_shift_imm_right<0b0, "sri">;
- defm SLI_ZZI : sve2_int_bin_cons_shift_imm_left< 0b1, "sli">;
+ defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
+ defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
// SVE2 bitwise shift right and accumulate
- defm SSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b00, "ssra">;
- defm USRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b01, "usra">;
- defm SRSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b10, "srsra">;
- defm URSRA_ZZI : sve2_int_bin_accum_cons_shift_imm_right<0b11, "ursra">;
+ defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
+ defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
+ defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
+ defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
// SVE2 complex integer add
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
@@ -1228,41 +1235,47 @@ let Predicates = [HasSVE2] in {
defm SBCLB_ZZZ : sve2_int_addsub_long_carry<0b10, "sbclb">;
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
- // SVE2 bitwise shift right narrow
- defm SQSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0000, "sqshrunb">;
- defm SQSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0001, "sqshrunt">;
- defm SQRSHRUNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0010, "sqrshrunb">;
- defm SQRSHRUNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0011, "sqrshrunt">;
- defm SHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0100, "shrnb">;
- defm SHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0101, "shrnt">;
- defm RSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0110, "rshrnb">;
- defm RSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b0111, "rshrnt">;
- defm SQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1000, "sqshrnb">;
- defm SQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1001, "sqshrnt">;
- defm SQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1010, "sqrshrnb">;
- defm SQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1011, "sqrshrnt">;
- defm UQSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1100, "uqshrnb">;
- defm UQSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1101, "uqshrnt">;
- defm UQRSHRNB_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1110, "uqrshrnb">;
- defm UQRSHRNT_ZZI : sve2_int_bin_cons_shift_imm_right_narrow<0b1111, "uqrshrnt">;
-
- // SVE2 integer add/subtract narrow high part
- defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b000, "addhnb">;
- defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b001, "addhnt">;
- defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high<0b010, "raddhnb">;
- defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high<0b011, "raddhnt">;
- defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b100, "subhnb">;
- defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b101, "subhnt">;
- defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high<0b110, "rsubhnb">;
- defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high<0b111, "rsubhnt">;
-
- // SVE2 saturating extract narrow
- defm SQXTNB_ZZ : sve2_int_sat_extract_narrow<0b000, "sqxtnb">;
- defm SQXTNT_ZZ : sve2_int_sat_extract_narrow<0b001, "sqxtnt">;
- defm UQXTNB_ZZ : sve2_int_sat_extract_narrow<0b010, "uqxtnb">;
- defm UQXTNT_ZZ : sve2_int_sat_extract_narrow<0b011, "uqxtnt">;
- defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow<0b100, "sqxtunb">;
- defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow<0b101, "sqxtunt">;
+ // SVE2 bitwise shift right narrow (bottom)
+ defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
+ defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
+ defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
+ defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
+ defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
+ defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
+ defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
+ defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
+
+ // SVE2 bitwise shift right narrow (top)
+ defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
+ defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
+ defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
+ defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
+ defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
+ defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
+ defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
+ defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
+
+ // SVE2 integer add/subtract narrow high part (bottom)
+ defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb">;
+ defm RADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b01, "raddhnb">;
+ defm SUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b10, "subhnb">;
+ defm RSUBHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b11, "rsubhnb">;
+
+ // SVE2 integer add/subtract narrow high part (top)
+ defm ADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b00, "addhnt">;
+ defm RADDHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b01, "raddhnt">;
+ defm SUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b10, "subhnt">;
+ defm RSUBHNT_ZZZ : sve2_int_addsub_narrow_high_top<0b11, "rsubhnt">;
+
+ // SVE2 saturating extract narrow (bottom)
+ defm SQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b00, "sqxtnb">;
+ defm UQXTNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b01, "uqxtnb">;
+ defm SQXTUNB_ZZ : sve2_int_sat_extract_narrow_bottom<0b10, "sqxtunb">;
+
+ // SVE2 saturating extract narrow (top)
+ defm SQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b00, "sqxtnt">;
+ defm UQXTNT_ZZ : sve2_int_sat_extract_narrow_top<0b01, "uqxtnt">;
+ defm SQXTUNT_ZZ : sve2_int_sat_extract_narrow_top<0b10, "sqxtunt">;
// SVE2 character match
defm MATCH_PPzZZ : sve2_char_match<0b0, "match">;
@@ -1289,10 +1302,14 @@ let Predicates = [HasSVE2] in {
// SVE2 histogram generation (vector)
defm HISTCNT_ZPzZZ : sve2_hist_gen_vector<"histcnt">;
+ // SVE2 floating-point base 2 logarithm as integer
+ defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
+
// SVE2 floating-point convert precision
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtxnt">;
defm FCVTNT_ZPmZ : sve2_fp_convert_down_narrow<"fcvtnt">;
defm FCVTLT_ZPmZ : sve2_fp_convert_up_long<"fcvtlt">;
+ def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
// SVE2 floating-point pairwise operations
defm FADDP_ZPmZZ : sve2_fp_pairwise_pred<0b000, "faddp">;
@@ -1321,58 +1338,45 @@ let Predicates = [HasSVE2] in {
def BSL2N_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b101, "bsl2n">;
def NBSL_ZZZZ_D : sve2_int_bitwise_ternary_op_d<0b111, "nbsl">;
- // sve_int_rotate_imm
+ // SVE2 bitwise xor and rotate right by immediate
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar">;
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
- // SVE floating-point convert precision
- def FCVTX_ZPmZ_DtoS : sve_fp_2op_p_zd<0b0001010, "fcvtx", ZPR64, ZPR32, ElementSizeD>;
-
- // SVE floating-point convert to integer
- defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb">;
-
- // Non-temporal contiguous loads (vector + register)
- defm LDNT1SB_ZZR_S : sve2_mem_cldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
- defm LDNT1B_ZZR_S : sve2_mem_cldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
- defm LDNT1SH_ZZR_S : sve2_mem_cldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
- defm LDNT1H_ZZR_S : sve2_mem_cldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
- defm LDNT1W_ZZR_S : sve2_mem_cldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
-
- defm LDNT1SB_ZZR_D : sve2_mem_cldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
- defm LDNT1B_ZZR_D : sve2_mem_cldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
- defm LDNT1SH_ZZR_D : sve2_mem_cldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
- defm LDNT1H_ZZR_D : sve2_mem_cldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
- defm LDNT1SW_ZZR_D : sve2_mem_cldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
- defm LDNT1W_ZZR_D : sve2_mem_cldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
- defm LDNT1D_ZZR_D : sve2_mem_cldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
+ // SVE2 non-temporal gather loads
+ defm LDNT1SB_ZZR_S : sve2_mem_gldnt_vs<0b00000, "ldnt1sb", Z_s, ZPR32>;
+ defm LDNT1B_ZZR_S : sve2_mem_gldnt_vs<0b00001, "ldnt1b", Z_s, ZPR32>;
+ defm LDNT1SH_ZZR_S : sve2_mem_gldnt_vs<0b00100, "ldnt1sh", Z_s, ZPR32>;
+ defm LDNT1H_ZZR_S : sve2_mem_gldnt_vs<0b00101, "ldnt1h", Z_s, ZPR32>;
+ defm LDNT1W_ZZR_S : sve2_mem_gldnt_vs<0b01001, "ldnt1w", Z_s, ZPR32>;
+
+ defm LDNT1SB_ZZR_D : sve2_mem_gldnt_vs<0b10000, "ldnt1sb", Z_d, ZPR64>;
+ defm LDNT1B_ZZR_D : sve2_mem_gldnt_vs<0b10010, "ldnt1b", Z_d, ZPR64>;
+ defm LDNT1SH_ZZR_D : sve2_mem_gldnt_vs<0b10100, "ldnt1sh", Z_d, ZPR64>;
+ defm LDNT1H_ZZR_D : sve2_mem_gldnt_vs<0b10110, "ldnt1h", Z_d, ZPR64>;
+ defm LDNT1SW_ZZR_D : sve2_mem_gldnt_vs<0b11000, "ldnt1sw", Z_d, ZPR64>;
+ defm LDNT1W_ZZR_D : sve2_mem_gldnt_vs<0b11010, "ldnt1w", Z_d, ZPR64>;
+ defm LDNT1D_ZZR_D : sve2_mem_gldnt_vs<0b11110, "ldnt1d", Z_d, ZPR64>;
// SVE2 vector splice (constructive)
defm SPLICE_ZPZZ : sve2_int_perm_splice_cons<"splice">;
- // Predicated shifts
- defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
- defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
- defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
- defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
- defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
-
- // Non-temporal contiguous stores (vector + register)
- defm STNT1B_ZZR_S : sve2_mem_cstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
- defm STNT1H_ZZR_S : sve2_mem_cstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
- defm STNT1W_ZZR_S : sve2_mem_cstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
+ // SVE2 non-temporal scatter stores
+ defm STNT1B_ZZR_S : sve2_mem_sstnt_vs<0b001, "stnt1b", Z_s, ZPR32>;
+ defm STNT1H_ZZR_S : sve2_mem_sstnt_vs<0b011, "stnt1h", Z_s, ZPR32>;
+ defm STNT1W_ZZR_S : sve2_mem_sstnt_vs<0b101, "stnt1w", Z_s, ZPR32>;
- defm STNT1B_ZZR_D : sve2_mem_cstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
- defm STNT1H_ZZR_D : sve2_mem_cstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
- defm STNT1W_ZZR_D : sve2_mem_cstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
- defm STNT1D_ZZR_D : sve2_mem_cstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
+ defm STNT1B_ZZR_D : sve2_mem_sstnt_vs<0b000, "stnt1b", Z_d, ZPR64>;
+ defm STNT1H_ZZR_D : sve2_mem_sstnt_vs<0b010, "stnt1h", Z_d, ZPR64>;
+ defm STNT1W_ZZR_D : sve2_mem_sstnt_vs<0b100, "stnt1w", Z_d, ZPR64>;
+ defm STNT1D_ZZR_D : sve2_mem_sstnt_vs<0b110, "stnt1d", Z_d, ZPR64>;
- // SVE table lookup (three sources)
+ // SVE2 table lookup (three sources)
defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl">;
defm TBX_ZZZ : sve2_int_perm_tbx<"tbx">;
- // SVE integer compare scalar count and limit
+ // SVE2 integer compare scalar count and limit
defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege">;
defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt">;
defm WHILEHS_PWW : sve_int_while4_rr<0b100, "whilehs">;
@@ -1383,7 +1387,7 @@ let Predicates = [HasSVE2] in {
defm WHILEHS_PXX : sve_int_while8_rr<0b100, "whilehs">;
defm WHILEHI_PXX : sve_int_while8_rr<0b101, "whilehi">;
- // SVE pointer conflict compare
+ // SVE2 pointer conflict compare
defm WHILEWR_PXX : sve2_int_while_rr<0b0, "whilewr">;
defm WHILERW_PXX : sve2_int_while_rr<0b1, "whilerw">;
}
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index a4b78f2a7d6b..301bf72d5239 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -618,6 +618,19 @@ int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
}
+AArch64TTIImpl::TTI::MemCmpExpansionOptions
+AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
+ TTI::MemCmpExpansionOptions Options;
+ Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
+ Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
+ Options.NumLoadsPerBlock = Options.MaxNumLoads;
+ // TODO: Though vector loads usually perform well on AArch64, in some targets
+ // they may wake up the FP unit, which raises the power consumption. Perhaps
+ // they could be used with no holds barred (-O3).
+ Options.LoadSizes = {8, 4, 2, 1};
+ return Options;
+}
+
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
unsigned Alignment, unsigned AddressSpace,
const Instruction *I) {
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.h b/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 10c15a139b4c..95cda63b0174 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -130,6 +130,9 @@ public:
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
const Instruction *I = nullptr);
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
+
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned AddressSpace, const Instruction *I = nullptr);
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index f4c55d48d215..09b42811f786 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2840,7 +2840,7 @@ static const struct Extension {
{"sve2-aes", {AArch64::FeatureSVE2AES}},
{"sve2-sm4", {AArch64::FeatureSVE2SM4}},
{"sve2-sha3", {AArch64::FeatureSVE2SHA3}},
- {"bitperm", {AArch64::FeatureSVE2BitPerm}},
+ {"sve2-bitperm", {AArch64::FeatureSVE2BitPerm}},
// FIXME: Unsupported extensions
{"pan", {}},
{"lor", {}},
diff --git a/lib/Target/AArch64/SVEInstrFormats.td b/lib/Target/AArch64/SVEInstrFormats.td
index 808e59467081..dfd6c576e99b 100644
--- a/lib/Target/AArch64/SVEInstrFormats.td
+++ b/lib/Target/AArch64/SVEInstrFormats.td
@@ -403,12 +403,12 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm> {
}
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
- ZPRRegOp zprty>
-: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, PPRAny:$Pg),
- asm, "\t$Zdn, $Pg",
+ ZPRRegOp zprty, PPRRegOp pprty>
+: I<(outs zprty:$Zdn), (ins zprty:$_Zdn, pprty:$Pm),
+ asm, "\t$Zdn, $Pm",
"",
[]>, Sched<[]> {
- bits<4> Pg;
+ bits<4> Pm;
bits<5> Zdn;
let Inst{31-24} = 0b00100101;
let Inst{23-22} = sz8_64;
@@ -416,7 +416,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Inst{18-16} = opc{4-2};
let Inst{15-11} = 0b10000;
let Inst{10-9} = opc{1-0};
- let Inst{8-5} = Pg;
+ let Inst{8-5} = Pm;
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
@@ -425,9 +425,16 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
}
multiclass sve_int_count_v<bits<5> opc, string asm> {
- def _H : sve_int_count_v<0b01, opc, asm, ZPR16>;
- def _S : sve_int_count_v<0b10, opc, asm, ZPR32>;
- def _D : sve_int_count_v<0b11, opc, asm, ZPR64>;
+ def _H : sve_int_count_v<0b01, opc, asm, ZPR16, PPR16>;
+ def _S : sve_int_count_v<0b10, opc, asm, ZPR32, PPR32>;
+ def _D : sve_int_count_v<0b11, opc, asm, ZPR64, PPR64>;
+
+ def : InstAlias<asm # "\t$Zdn, $Pm",
+ (!cast<Instruction>(NAME # "_H") ZPR16:$Zdn, PPRAny:$Pm), 0>;
+ def : InstAlias<asm # "\t$Zdn, $Pm",
+ (!cast<Instruction>(NAME # "_S") ZPR32:$Zdn, PPRAny:$Pm), 0>;
+ def : InstAlias<asm # "\t$Zdn, $Pm",
+ (!cast<Instruction>(NAME # "_D") ZPR64:$Zdn, PPRAny:$Pm), 0>;
}
class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
@@ -744,7 +751,7 @@ multiclass sve2_int_perm_tbl<string asm> {
}
class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
-: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
[]>, Sched<[]> {
@@ -758,6 +765,8 @@ class sve2_int_perm_tbx<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{15-10} = 0b001011;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
multiclass sve2_int_perm_tbx<string asm> {
@@ -1489,7 +1498,7 @@ multiclass sve_fp_fcadd<string asm> {
class sve2_fp_convert_precision<bits<4> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2>
-: I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
asm, "\t$Zd, $Pg/m, $Zn",
"",
[]>, Sched<[]> {
@@ -1504,6 +1513,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
multiclass sve2_fp_convert_down_narrow<string asm> {
@@ -2399,21 +2410,40 @@ multiclass sve2_misc_bitwise<bits<4> opc, string asm> {
def _D : sve2_misc<0b11, opc, asm, ZPR64, ZPR64>;
}
-multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
- let DestructiveInstType = Destructive, ElementSize = ElementSizeNone in {
- def _B : sve2_misc<0b00, { 0b010, opc }, asm, ZPR8, ZPR8>;
- def _H : sve2_misc<0b01, { 0b010, opc }, asm, ZPR16, ZPR16>;
- def _S : sve2_misc<0b10, { 0b010, opc }, asm, ZPR32, ZPR32>;
- def _D : sve2_misc<0b11, { 0b010, opc }, asm, ZPR64, ZPR64>;
- }
-}
-
multiclass sve2_misc_int_addsub_long_interleaved<bits<2> opc, string asm> {
def _H : sve2_misc<0b01, { 0b00, opc }, asm, ZPR16, ZPR8>;
def _S : sve2_misc<0b10, { 0b00, opc }, asm, ZPR32, ZPR16>;
def _D : sve2_misc<0b11, { 0b00, opc }, asm, ZPR64, ZPR32>;
}
+class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b0;
+ let Inst{20-16} = Zm;
+ let Inst{15-11} = 0b10010;
+ let Inst{10} = opc;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
+ let DestructiveInstType = Destructive;
+ let ElementSize = ElementSizeNone;
+}
+
+multiclass sve2_bitwise_xor_interleaved<bit opc, string asm> {
+ def _B : sve2_bitwise_xor_interleaved<0b00, opc, asm, ZPR8, ZPR8>;
+ def _H : sve2_bitwise_xor_interleaved<0b01, opc, asm, ZPR16, ZPR16>;
+ def _S : sve2_bitwise_xor_interleaved<0b10, opc, asm, ZPR32, ZPR32>;
+ def _D : sve2_bitwise_xor_interleaved<0b11, opc, asm, ZPR64, ZPR64>;
+}
+
class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
ZPRRegOp zprty1, ZPRRegOp zprty2,
Operand immtype>
@@ -2451,9 +2481,9 @@ multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm> {
// SVE2 Accumulate Group
//===----------------------------------------------------------------------===//
-class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
- ZPRRegOp zprty, Operand immtype>
-: I<(outs zprty:$Zd), (ins zprty:$Zn, immtype:$imm),
+class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
+: I<(outs zprty:$Zd), (ins zprty:$_Zd, zprty:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
bits<5> Zd;
@@ -2468,38 +2498,40 @@ class sve2_int_bin_cons_shift_imm<bits<4> tsz8_64, bit opc, string asm,
let Inst{10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_bin_cons_shift_imm_left<bit opc, string asm> {
- def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
- def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
+ def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{20-19} = imm{4-3};
}
- def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}
-multiclass sve2_int_bin_cons_shift_imm_right<bit opc, string asm> {
- def _B : sve2_int_bin_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
- def _H : sve2_int_bin_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
+ def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ def _S : sve2_int_bin_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def _D : sve2_int_bin_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ def _D : sve2_int_bin_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
}
-class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
- ZPRRegOp zprty, Operand immtype>
+class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zda), (ins zprty:$_Zda, zprty:$Zn, immtype:$imm),
asm, "\t$Zda, $Zn, $imm",
"", []>, Sched<[]> {
@@ -2521,15 +2553,15 @@ class sve2_int_bin_accum_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_bin_accum_cons_shift_imm_right<bits<2> opc, string asm> {
- def _B : sve2_int_bin_accum_cons_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
- def _H : sve2_int_bin_accum_cons_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
+multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
+ def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_accum_cons_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
+ def _S : sve2_int_bin_accum_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
- def _D : sve2_int_bin_accum_cons_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
+ def _D : sve2_int_bin_accum_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
@@ -2607,9 +2639,9 @@ multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm> {
// SVE2 Narrowing Group
//===----------------------------------------------------------------------===//
-class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
- string asm, ZPRRegOp zprty1,
- ZPRRegOp zprty2, Operand immtype>
+class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
+ string asm, ZPRRegOp zprty1,
+ ZPRRegOp zprty2, Operand immtype>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, immtype:$imm),
asm, "\t$Zd, $Zn, $imm",
"", []>, Sched<[]> {
@@ -2622,26 +2654,63 @@ class sve2_int_bin_cons_shift_imm_narrow<bits<3> tsz8_64, bits<4> opc,
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-16} = imm{2-0}; // imm3
let Inst{15-14} = 0b00;
- let Inst{13-10} = opc;
+ let Inst{13-11} = opc;
+ let Inst{10} = 0b0;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> {
+ def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
+ vecshiftR8>;
+ def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
+ vecshiftR16> {
+ let Inst{19} = imm{3};
+ }
+ def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
+ vecshiftR32> {
+ let Inst{20-19} = imm{4-3};
+ }
+}
+
+class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
+ string asm, ZPRRegOp zprty1,
+ ZPRRegOp zprty2, Operand immtype>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, immtype:$imm),
+ asm, "\t$Zd, $Zn, $imm",
+ "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> imm;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = tsz8_64{2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-16} = imm{2-0}; // imm3
+ let Inst{15-14} = 0b00;
+ let Inst{13-11} = opc;
+ let Inst{10} = 0b1;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_bin_cons_shift_imm_right_narrow<bits<4> opc, string asm> {
- def _B : sve2_int_bin_cons_shift_imm_narrow<{0,0,1}, opc, asm, ZPR8, ZPR16,
- vecshiftR8>;
- def _H : sve2_int_bin_cons_shift_imm_narrow<{0,1,?}, opc, asm, ZPR16, ZPR32,
- vecshiftR16> {
+multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> {
+ def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
+ vecshiftR8>;
+ def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
+ vecshiftR16> {
let Inst{19} = imm{3};
}
- def _S : sve2_int_bin_cons_shift_imm_narrow<{1,?,?}, opc, asm, ZPR32, ZPR64,
- vecshiftR32> {
+ def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
+ vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
}
-class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
- ZPRRegOp zprty1, ZPRRegOp zprty2>
+class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn, zprty2:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
bits<5> Zd;
@@ -2652,19 +2721,46 @@ class sve2_int_addsub_narrow_high<bits<2> sz, bits<3> opc, string asm,
let Inst{21} = 0b1;
let Inst{20-16} = Zm;
let Inst{15-13} = 0b011;
- let Inst{12-10} = opc; // S, R, T
+ let Inst{12-11} = opc; // S, R
+ let Inst{10} = 0b0; // Top
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm> {
+ def _B : sve2_int_addsub_narrow_high_bottom<0b01, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_addsub_narrow_high_bottom<0b10, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_addsub_narrow_high_bottom<0b11, opc, asm, ZPR32, ZPR64>;
+}
+
+class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn, zprty2:$Zm),
+ asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ bits<5> Zm;
+ let Inst{31-24} = 0b01000101;
+ let Inst{23-22} = sz;
+ let Inst{21} = 0b1;
+ let Inst{20-16} = Zm;
+ let Inst{15-13} = 0b011;
+ let Inst{12-11} = opc; // S, R
+ let Inst{10} = 0b1; // Top
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_addsub_narrow_high<bits<3> opc, string asm> {
- def _B : sve2_int_addsub_narrow_high<0b01, opc, asm, ZPR8, ZPR16>;
- def _H : sve2_int_addsub_narrow_high<0b10, opc, asm, ZPR16, ZPR32>;
- def _S : sve2_int_addsub_narrow_high<0b11, opc, asm, ZPR32, ZPR64>;
+multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm> {
+ def _B : sve2_int_addsub_narrow_high_top<0b01, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_addsub_narrow_high_top<0b10, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_addsub_narrow_high_top<0b11, opc, asm, ZPR32, ZPR64>;
}
-class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
- ZPRRegOp zprty1, ZPRRegOp zprty2>
+class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
: I<(outs zprty1:$Zd), (ins zprty2:$Zn),
asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
bits<5> Zd;
@@ -2674,15 +2770,41 @@ class sve2_int_sat_extract_narrow<bits<3> tsz8_64, bits<3> opc, string asm,
let Inst{21} = 0b1;
let Inst{20-19} = tsz8_64{1-0};
let Inst{18-13} = 0b000010;
- let Inst{12-10} = opc;
+ let Inst{12-11} = opc;
+ let Inst{10} = 0b0;
+ let Inst{9-5} = Zn;
+ let Inst{4-0} = Zd;
+}
+
+multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm> {
+ def _B : sve2_int_sat_extract_narrow_bottom<0b001, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_sat_extract_narrow_bottom<0b010, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_sat_extract_narrow_bottom<0b100, opc, asm, ZPR32, ZPR64>;
+}
+
+class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2>
+: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, zprty2:$Zn),
+ asm, "\t$Zd, $Zn", "", []>, Sched<[]> {
+ bits<5> Zd;
+ bits<5> Zn;
+ let Inst{31-23} = 0b010001010;
+ let Inst{22} = tsz8_64{2};
+ let Inst{21} = 0b1;
+ let Inst{20-19} = tsz8_64{1-0};
+ let Inst{18-13} = 0b000010;
+ let Inst{12-11} = opc;
+ let Inst{10} = 0b1;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_sat_extract_narrow<bits<3> opc, string asm> {
- def _B : sve2_int_sat_extract_narrow<0b001, opc, asm, ZPR8, ZPR16>;
- def _H : sve2_int_sat_extract_narrow<0b010, opc, asm, ZPR16, ZPR32>;
- def _S : sve2_int_sat_extract_narrow<0b100, opc, asm, ZPR32, ZPR64>;
+multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm> {
+ def _B : sve2_int_sat_extract_narrow_top<0b001, opc, asm, ZPR8, ZPR16>;
+ def _H : sve2_int_sat_extract_narrow_top<0b010, opc, asm, ZPR16, ZPR32>;
+ def _S : sve2_int_sat_extract_narrow_top<0b100, opc, asm, ZPR32, ZPR64>;
}
//===----------------------------------------------------------------------===//
@@ -3886,9 +4008,9 @@ multiclass sve_mem_cstnt_ss<bits<2> msz, string asm, RegisterOperand listty,
(!cast<Instruction>(NAME) zprty:$Zt, PPR3bAny:$Pg, GPR64sp:$Rn, gprty:$Rm), 0>;
}
-class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
- RegisterOperand VecList>
-: I<(outs VecList:$Zt), iops,
+class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
+ RegisterOperand listty, ZPRRegOp zprty>
+: I<(outs), (ins listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, "\t$Zt, $Pg, [$Zn, $Rm]",
"",
[]>, Sched<[]> {
@@ -3908,17 +4030,14 @@ class sve2_mem_cstnt_vs_base<bits<3> opc, dag iops, string asm,
let mayStore = 1;
}
-multiclass sve2_mem_cstnt_vs<bits<3> opc, string asm,
+multiclass sve2_mem_sstnt_vs<bits<3> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve2_mem_cstnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
- asm, listty>;
+ def _REAL : sve2_mem_sstnt_vs_base<opc, asm, listty, zprty>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
- def : InstAlias<asm # "\t$Zt, $Pg, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
}
@@ -5094,7 +5213,7 @@ multiclass sve_mem_p_fill<string asm> {
(!cast<Instruction>(NAME) PPRAny:$Pt, GPR64sp:$Rn, 0), 1>;
}
-class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
+class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
RegisterOperand VecList>
: I<(outs VecList:$Zt), iops,
asm, "\t$Zt, $Pg/z, [$Zn, $Rm]",
@@ -5119,17 +5238,15 @@ class sve2_mem_cldnt_vs_base<bits<5> opc, dag iops, string asm,
let mayLoad = 1;
}
-multiclass sve2_mem_cldnt_vs<bits<5> opc, string asm,
+multiclass sve2_mem_gldnt_vs<bits<5> opc, string asm,
RegisterOperand listty, ZPRRegOp zprty> {
- def _REAL : sve2_mem_cldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
+ def _REAL : sve2_mem_gldnt_vs_base<opc, (ins PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm),
asm, listty>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) zprty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 0>;
- def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn, $Rm]",
- (!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, GPR64:$Rm), 0>;
def : InstAlias<asm # "\t$Zt, $Pg/z, [$Zn]",
(!cast<Instruction>(NAME # _REAL) listty:$Zt, PPR3bAny:$Pg, zprty:$Zn, XZR), 1>;
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 18bb9bf3eccc..d390c9e237e6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -14369,7 +14369,8 @@ const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
/// constraint it is for this target.
ARMTargetLowering::ConstraintType
ARMTargetLowering::getConstraintType(StringRef Constraint) const {
- if (Constraint.size() == 1) {
+ unsigned S = Constraint.size();
+ if (S == 1) {
switch (Constraint[0]) {
default: break;
case 'l': return C_RegisterClass;
@@ -14377,12 +14378,12 @@ ARMTargetLowering::getConstraintType(StringRef Constraint) const {
case 'h': return C_RegisterClass;
case 'x': return C_RegisterClass;
case 't': return C_RegisterClass;
- case 'j': return C_Other; // Constant for movw.
- // An address with a single base register. Due to the way we
- // currently handle addresses it is the same as an 'r' memory constraint.
+ case 'j': return C_Immediate; // Constant for movw.
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as an 'r' memory constraint.
case 'Q': return C_Memory;
}
- } else if (Constraint.size() == 2) {
+ } else if (S == 2) {
switch (Constraint[0]) {
default: break;
case 'T': return C_RegisterClass;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index cfeb13c6acb6..fa266c41080c 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -592,6 +592,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
[(ARMbrjt tGPR:$target, tjumptable:$jt)]>,
Sched<[WriteBrTbl]> {
let Size = 2;
+ let isNotDuplicable = 1;
list<Predicate> Predicates = [IsThumb, IsThumb1Only];
}
}
@@ -1465,7 +1466,7 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
// and make use of the same compressed jump table format as Thumb-2.
let Size = 2, isBranch = 1, isTerminator = 1, isBarrier = 1,
- isIndirectBranch = 1 in {
+ isIndirectBranch = 1, isNotDuplicable = 1 in {
def tTBB_JT : tPseudoInst<(outs),
(ins tGPRwithpc:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0,
IIC_Br, []>, Sched<[WriteBr]>;
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index b6ba5f22fafb..f159beee9730 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -1689,6 +1689,8 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
if (Constraint.size() == 1) {
// See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
switch (Constraint[0]) {
+ default:
+ break;
case 'a': // Simple upper registers
case 'b': // Base pointer registers pairs
case 'd': // Upper register
@@ -1715,9 +1717,7 @@ AVRTargetLowering::getConstraintType(StringRef Constraint) const {
case 'O': // Integer constant (Range: 8, 16, 24)
case 'P': // Integer constant (Range: 1)
case 'R': // Integer constant (Range: -6 to 5)x
- return C_Other;
- default:
- break;
+ return C_Immediate;
}
}
diff --git a/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index 51d4cbc8a429..509484b71544 100644
--- a/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -116,9 +116,8 @@ private:
void replaceWithGEP(std::vector<CallInst *> &CallList,
uint32_t NumOfZerosIndex, uint32_t DIIndex);
- Value *computeBaseAndAccessStr(CallInst *Call, std::string &AccessStr,
- std::string &AccessKey, uint32_t Kind,
- MDNode *&TypeMeta);
+ Value *computeBaseAndAccessKey(CallInst *Call, std::string &AccessKey,
+ uint32_t Kind, MDNode *&TypeMeta);
bool getAccessIndex(const Value *IndexValue, uint64_t &AccessIndex);
bool transformGEPChain(Module &M, CallInst *Call, uint32_t Kind);
};
@@ -340,8 +339,7 @@ bool BPFAbstractMemberAccess::getAccessIndex(const Value *IndexValue,
/// Compute the base of the whole preserve_*_access_index chains, i.e., the base
/// pointer of the first preserve_*_access_index call, and construct the access
/// string, which will be the name of a global variable.
-Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
- std::string &AccessStr,
+Value *BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst *Call,
std::string &AccessKey,
uint32_t Kind,
MDNode *&TypeMeta) {
@@ -392,16 +390,16 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
if (!LastTypeName.size() || AccessIndices.size() > TypeNameIndex + 2)
return nullptr;
- // Construct the type string AccessStr.
+ // Construct the type string AccessKey.
for (unsigned I = 0; I < AccessIndices.size(); ++I)
- AccessStr = std::to_string(AccessIndices[I]) + ":" + AccessStr;
+ AccessKey = std::to_string(AccessIndices[I]) + ":" + AccessKey;
if (TypeNameIndex == AccessIndices.size() - 1)
- AccessStr = "0:" + AccessStr;
+ AccessKey = "0:" + AccessKey;
// Access key is the type name + access string, uniquely identifying
// one kernel memory access.
- AccessKey = LastTypeName + ":" + AccessStr;
+ AccessKey = LastTypeName + ":" + AccessKey;
return Base;
}
@@ -410,10 +408,10 @@ Value *BPFAbstractMemberAccess::computeBaseAndAccessStr(CallInst *Call,
/// transformation to a chain of relocable GEPs.
bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
uint32_t Kind) {
- std::string AccessStr, AccessKey;
+ std::string AccessKey;
MDNode *TypeMeta = nullptr;
Value *Base =
- computeBaseAndAccessStr(Call, AccessStr, AccessKey, Kind, TypeMeta);
+ computeBaseAndAccessKey(Call, AccessKey, Kind, TypeMeta);
if (!Base)
return false;
@@ -432,7 +430,7 @@ bool BPFAbstractMemberAccess::transformGEPChain(Module &M, CallInst *Call,
if (GEPGlobals.find(AccessKey) == GEPGlobals.end()) {
GV = new GlobalVariable(M, Type::getInt64Ty(BB->getContext()), false,
- GlobalVariable::ExternalLinkage, NULL, AccessStr);
+ GlobalVariable::ExternalLinkage, NULL, AccessKey);
GV->addAttribute(BPFCoreSharedInfo::AmaAttr);
// Set the metadata (debuginfo types) for the global.
if (TypeMeta)
diff --git a/lib/Target/BPF/BTFDebug.cpp b/lib/Target/BPF/BTFDebug.cpp
index fa35c6619e21..5c542e739088 100644
--- a/lib/Target/BPF/BTFDebug.cpp
+++ b/lib/Target/BPF/BTFDebug.cpp
@@ -30,6 +30,18 @@ static const char *BTFKindStr[] = {
#include "BTF.def"
};
+static const DIType * stripQualifiers(const DIType *Ty) {
+ while (const auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+ unsigned Tag = DTy->getTag();
+ if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
+ Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_restrict_type)
+ break;
+ Ty = DTy->getBaseType();
+ }
+
+ return Ty;
+}
+
/// Emit a BTF common type.
void BTFTypeBase::emitType(MCStreamer &OS) {
OS.AddComment(std::string(BTFKindStr[Kind]) + "(id = " + std::to_string(Id) +
@@ -184,9 +196,9 @@ void BTFTypeEnum::emitType(MCStreamer &OS) {
}
}
-BTFTypeArray::BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize,
- uint32_t NumElems)
- : ElemSize(ElemSize) {
+BTFTypeArray::BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
+ uint32_t ElemSize, uint32_t NumElems)
+ : ElemTyNoQual(Ty), ElemSize(ElemSize) {
Kind = BTF::BTF_KIND_ARRAY;
BTFType.NameOff = 0;
BTFType.Info = Kind << 24;
@@ -207,6 +219,9 @@ void BTFTypeArray::completeType(BTFDebug &BDebug) {
// created during initial type traversal. Just
// retrieve that type id.
ArrayInfo.IndexType = BDebug.getArrayIndexTypeId();
+
+ ElemTypeNoQual = ElemTyNoQual ? BDebug.getTypeId(ElemTyNoQual)
+ : ArrayInfo.ElemType;
}
void BTFTypeArray::emitType(MCStreamer &OS) {
@@ -218,7 +233,7 @@ void BTFTypeArray::emitType(MCStreamer &OS) {
void BTFTypeArray::getLocInfo(uint32_t Loc, uint32_t &LocOffset,
uint32_t &ElementTypeId) {
- ElementTypeId = ArrayInfo.ElemType;
+ ElementTypeId = ElemTypeNoQual;
LocOffset = Loc * ElemSize;
}
@@ -251,7 +266,9 @@ void BTFTypeStruct::completeType(BTFDebug &BDebug) {
} else {
BTFMember.Offset = DDTy->getOffsetInBits();
}
- BTFMember.Type = BDebug.getTypeId(DDTy->getBaseType());
+ const auto *BaseTy = DDTy->getBaseType();
+ BTFMember.Type = BDebug.getTypeId(BaseTy);
+ MemberTypeNoQual.push_back(BDebug.getTypeId(stripQualifiers(BaseTy)));
Members.push_back(BTFMember);
}
}
@@ -270,7 +287,7 @@ std::string BTFTypeStruct::getName() { return STy->getName(); }
void BTFTypeStruct::getMemberInfo(uint32_t Loc, uint32_t &MemberOffset,
uint32_t &MemberType) {
- MemberType = Members[Loc].Type;
+ MemberType = MemberTypeNoQual[Loc];
MemberOffset =
HasBitField ? Members[Loc].Offset & 0xffffff : Members[Loc].Offset;
}
@@ -492,10 +509,13 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
uint32_t ElemTypeId, ElemSize;
const DIType *ElemType = CTy->getBaseType();
visitTypeEntry(ElemType, ElemTypeId, false, false);
+
+ // Strip qualifiers from element type to get accurate element size.
+ ElemType = stripQualifiers(ElemType);
ElemSize = ElemType->getSizeInBits() >> 3;
if (!CTy->getSizeInBits()) {
- auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemTypeId, 0, 0);
+ auto TypeEntry = llvm::make_unique<BTFTypeArray>(ElemType, ElemTypeId, 0, 0);
ArrayTypes.push_back(TypeEntry.get());
ElemTypeId = addType(std::move(TypeEntry), CTy);
} else {
@@ -507,9 +527,11 @@ void BTFDebug::visitArrayType(const DICompositeType *CTy, uint32_t &TypeId) {
const DISubrange *SR = cast<DISubrange>(Element);
auto *CI = SR->getCount().dyn_cast<ConstantInt *>();
int64_t Count = CI->getSExtValue();
+ const DIType *ArrayElemTy = (I == 0) ? ElemType : nullptr;
auto TypeEntry =
- llvm::make_unique<BTFTypeArray>(ElemTypeId, ElemSize, Count);
+ llvm::make_unique<BTFTypeArray>(ArrayElemTy, ElemTypeId,
+ ElemSize, Count);
ArrayTypes.push_back(TypeEntry.get());
if (I == 0)
ElemTypeId = addType(std::move(TypeEntry), CTy);
@@ -1006,19 +1028,20 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
unsigned RootId = populateStructType(RootTy);
setTypeFromId(RootId, &PrevStructType, &PrevArrayType);
unsigned RootTySize = PrevStructType->getStructSize();
+ StringRef IndexPattern = AccessPattern.substr(AccessPattern.find_first_of(':') + 1);
BTFOffsetReloc OffsetReloc;
OffsetReloc.Label = ORSym;
- OffsetReloc.OffsetNameOff = addString(AccessPattern.drop_back());
+ OffsetReloc.OffsetNameOff = addString(IndexPattern.drop_back());
OffsetReloc.TypeID = RootId;
uint32_t Start = 0, End = 0, Offset = 0;
bool FirstAccess = true;
- for (auto C : AccessPattern) {
+ for (auto C : IndexPattern) {
if (C != ':') {
End++;
} else {
- std::string SubStr = AccessPattern.substr(Start, End - Start);
+ std::string SubStr = IndexPattern.substr(Start, End - Start);
int Loc = std::stoi(SubStr);
if (FirstAccess) {
@@ -1038,12 +1061,15 @@ void BTFDebug::generateOffsetReloc(const MachineInstr *MI,
Offset += LocOffset;
PrevArrayType = nullptr;
setTypeFromId(ElementTypeId, &PrevStructType, &PrevArrayType);
+ } else {
+ llvm_unreachable("Internal Error: BTF offset relocation type traversal error");
}
+
Start = End + 1;
End = Start;
}
}
- AccessOffsets[RootTy->getName().str() + ":" + AccessPattern.str()] = Offset;
+ AccessOffsets[AccessPattern.str()] = Offset;
OffsetRelocTable[SecNameOff].push_back(OffsetReloc);
}
@@ -1227,7 +1253,7 @@ bool BTFDebug::InstLower(const MachineInstr *MI, MCInst &OutMI) {
MDNode *MDN = GVar->getMetadata(LLVMContext::MD_preserve_access_index);
DIType *Ty = dyn_cast<DIType>(MDN);
std::string TypeName = Ty->getName();
- int64_t Imm = AccessOffsets[TypeName + ":" + GVar->getName().str()];
+ int64_t Imm = AccessOffsets[GVar->getName().str()];
// Emit "mov ri, <imm>" for abstract member accesses.
OutMI.setOpcode(BPF::MOV_ri);
diff --git a/lib/Target/BPF/BTFDebug.h b/lib/Target/BPF/BTFDebug.h
index 6c0cdde17d9b..e210d18f941e 100644
--- a/lib/Target/BPF/BTFDebug.h
+++ b/lib/Target/BPF/BTFDebug.h
@@ -104,11 +104,14 @@ public:
/// Handle array type.
class BTFTypeArray : public BTFTypeBase {
+ const DIType *ElemTyNoQual;
uint32_t ElemSize;
struct BTF::BTFArray ArrayInfo;
+ uint32_t ElemTypeNoQual;
public:
- BTFTypeArray(uint32_t ElemTypeId, uint32_t ElemSize, uint32_t NumElems);
+ BTFTypeArray(const DIType *Ty, uint32_t ElemTypeId,
+ uint32_t ElemSize, uint32_t NumElems);
uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
void completeType(BTFDebug &BDebug);
void emitType(MCStreamer &OS);
@@ -120,6 +123,7 @@ class BTFTypeStruct : public BTFTypeBase {
const DICompositeType *STy;
bool HasBitField;
std::vector<struct BTF::BTFMember> Members;
+ std::vector<uint32_t> MemberTypeNoQual;
public:
BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
diff --git a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 0172c6298772..f10f7a2b77d6 100644
--- a/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1208,6 +1208,24 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
Res = V;
} else
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
+
+ MCBinaryExpr::Opcode Opcode;
+ switch (getLexer().getKind()) {
+ default:
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return MatchOperand_Success;
+ case AsmToken::Plus:
+ Opcode = MCBinaryExpr::Add;
+ break;
+ case AsmToken::Minus:
+ Opcode = MCBinaryExpr::Sub;
+ break;
+ }
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr))
+ return MatchOperand_ParseFail;
+ Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
return MatchOperand_Success;
}
diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp
index 32c3b9684d2c..bbaa16c08634 100644
--- a/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const {
uint64_t FrameSize = MFI.getStackSize();
// Get the alignment.
- uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment()
- : getStackAlignment();
+ unsigned StackAlign = getStackAlignment();
+ if (RI->needsStackRealignment(MF)) {
+ unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment());
+ FrameSize += (MaxStackAlign - StackAlign);
+ StackAlign = MaxStackAlign;
+ }
+
+ // Set Max Call Frame Size
+ uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign);
+ MFI.setMaxCallFrameSize(MaxCallSize);
// Make sure the frame is aligned.
FrameSize = alignTo(FrameSize, StackAlign);
@@ -101,6 +109,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
const RISCVInstrInfo *TII = STI.getInstrInfo();
MachineBasicBlock::iterator MBBI = MBB.begin();
+ if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) {
+ report_fatal_error(
+ "RISC-V backend can't currently handle functions that need stack "
+ "realignment and have variable sized objects");
+ }
+
unsigned FPReg = getFPReg(STI);
unsigned SPReg = getSPReg(STI);
@@ -158,6 +172,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
nullptr, RI->getDwarfRegNum(FPReg, true), 0));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+
+ // Realign Stack
+ const RISCVRegisterInfo *RI = STI.getRegisterInfo();
+ if (RI->needsStackRealignment(MF)) {
+ unsigned MaxAlignment = MFI.getMaxAlignment();
+
+ const RISCVInstrInfo *TII = STI.getInstrInfo();
+ if (isInt<12>(-(int)MaxAlignment)) {
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg)
+ .addReg(SPReg)
+ .addImm(-(int)MaxAlignment);
+ } else {
+ unsigned ShiftAmount = countTrailingZeros(MaxAlignment);
+ unsigned VR =
+ MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR)
+ .addReg(SPReg)
+ .addImm(ShiftAmount);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg)
+ .addReg(VR)
+ .addImm(ShiftAmount);
+ }
+ }
}
}
@@ -257,6 +294,13 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF,
if (FI >= MinCSFI && FI <= MaxCSFI) {
FrameReg = RISCV::X2;
Offset += MF.getFrameInfo().getStackSize();
+ } else if (RI->needsStackRealignment(MF)) {
+ assert(!MFI.hasVarSizedObjects() &&
+ "Unexpected combination of stack realignment and varsized objects");
+ // If the stack was realigned, the frame pointer is set in order to allow
+ // SP to be restored, but we still access stack objects using SP.
+ FrameReg = RISCV::X2;
+ Offset += MF.getFrameInfo().getStackSize();
} else {
FrameReg = RI->getFrameRegister(MF);
if (hasFP(MF))
diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp
index ce7b85911ab6..e695f79f5cf4 100644
--- a/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1007,12 +1007,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
// We can materialise `c1 << c2` into an add immediate, so it's "free",
// and the combine should happen, to potentially allow further combines
// later.
- if (isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
+ if (ShiftedC1Int.getMinSignedBits() <= 64 &&
+ isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
return true;
// We can materialise `c1` in an add immediate, so it's "free", and the
// combine should be prevented.
- if (isLegalAddImmediate(C1Int.getSExtValue()))
+ if (C1Int.getMinSignedBits() <= 64 &&
+ isLegalAddImmediate(C1Int.getSExtValue()))
return false;
// Neither constant will fit into an immediate, so find materialisation
@@ -2397,6 +2399,25 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
}
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+RISCVTargetLowering::ConstraintType
+RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'f':
+ return C_RegisterClass;
+ case 'I':
+ case 'J':
+ case 'K':
+ return C_Immediate;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
std::pair<unsigned, const TargetRegisterClass *>
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
@@ -2407,6 +2428,12 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
switch (Constraint[0]) {
case 'r':
return std::make_pair(0U, &RISCV::GPRRegClass);
+ case 'f':
+ if (Subtarget.hasStdExtF() && VT == MVT::f32)
+ return std::make_pair(0U, &RISCV::FPR32RegClass);
+ if (Subtarget.hasStdExtD() && VT == MVT::f64)
+ return std::make_pair(0U, &RISCV::FPR64RegClass);
+ break;
default:
break;
}
diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h
index 17db03bbb69e..f28c4753c1d9 100644
--- a/lib/Target/RISCV/RISCVISelLowering.h
+++ b/lib/Target/RISCV/RISCVISelLowering.h
@@ -92,6 +92,7 @@ public:
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ ConstraintType getConstraintType(StringRef Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint, MVT VT) const override;
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index a6d440fa8aa2..804f7ba74edf 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -3183,7 +3183,7 @@ SparcTargetLowering::getConstraintType(StringRef Constraint) const {
case 'e':
return C_RegisterClass;
case 'I': // SIMM13
- return C_Other;
+ return C_Immediate;
}
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 78820f511ab4..e7b7a5b0cd53 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -956,7 +956,7 @@ SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
case 'K': // Signed 16-bit constant
case 'L': // Signed 20-bit displacement (on all targets we support)
case 'M': // 0x7fffffff
- return C_Other;
+ return C_Immediate;
default:
break;
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 3112f00c91f2..e20315da55a5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -95,7 +95,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions">;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
- "64-bit with cmpxchg16b">;
+ "64-bit with cmpxchg16b",
+ [FeatureCMPXCHG8B]>;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 95d31e62cafc..34ad589d205f 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -2464,6 +2464,37 @@ bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
Complexity += 2;
}
+ // Heuristic: try harder to form an LEA from ADD if the operands set flags.
+ // Unlike ADD, LEA does not affect flags, so we will be less likely to require
+ // duplicating flag-producing instructions later in the pipeline.
+ if (N.getOpcode() == ISD::ADD) {
+ auto isMathWithFlags = [](SDValue V) {
+ switch (V.getOpcode()) {
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::ADC:
+ case X86ISD::SBB:
+ /* TODO: These opcodes can be added safely, but we may want to justify
+ their inclusion for different reasons (better for reg-alloc).
+ case X86ISD::SMUL:
+ case X86ISD::UMUL:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ */
+ // Value 1 is the flag output of the node - verify it's not dead.
+ return !SDValue(V.getNode(), 1).use_empty();
+ default:
+ return false;
+ }
+ };
+ // TODO: This could be an 'or' rather than 'and' to make the transform more
+ // likely to happen. We might want to factor in whether there's a
+ // load folding opportunity for the math op that disappears with LEA.
+ if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1)))
+ Complexity++;
+ }
+
if (AM.Disp)
Complexity++;
@@ -3302,8 +3333,12 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
SDValue ImplDef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef);
- NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, MVT::i32, ImplDef,
- NBits);
+
+ SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32);
+ insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal);
+ NBits = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef,
+ NBits, SRIdxVal), 0);
insertDAGNode(*CurDAG, SDValue(Node, 0), NBits);
if (Subtarget->hasBMI2()) {
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0b4bf687e6cf..ad68ddbeaa8b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4069,6 +4069,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
+ // Save heapallocsite metadata.
+ if (CLI.CS)
+ if (MDNode *HeapAlloc = CLI.CS->getMetadata("heapallocsite"))
+ DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
+
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
@@ -5500,6 +5505,7 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
Idx == (VT.getVectorNumElements() / 2) &&
Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ Src.getOperand(1).getValueType() == SubVT &&
isNullConstant(Src.getOperand(2))) {
Ops.push_back(Src.getOperand(1));
Ops.push_back(Sub);
@@ -34062,25 +34068,6 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
return true;
break;
}
- case X86ISD::SUBV_BROADCAST: {
- // Reduce size of broadcast if we don't need the upper half.
- unsigned HalfElts = NumElts / 2;
- if (DemandedElts.extractBits(HalfElts, HalfElts).isNullValue()) {
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
-
- SDValue Half = Src;
- if (SrcVT.getVectorNumElements() != HalfElts) {
- MVT HalfVT = MVT::getVectorVT(SrcVT.getScalarType(), HalfElts);
- Half = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, SDLoc(Op), HalfVT, Src);
- }
-
- return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Half, 0,
- TLO.DAG, SDLoc(Op),
- Half.getValueSizeInBits()));
- }
- break;
- }
case X86ISD::VPERMV: {
SDValue Mask = Op.getOperand(0);
APInt MaskUndef, MaskZero;
@@ -34135,6 +34122,21 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
}
+ // Subvector broadcast.
+ case X86ISD::SUBV_BROADCAST: {
+ SDLoc DL(Op);
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueSizeInBits() > ExtSizeInBits)
+ Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits);
+ else if (Src.getValueSizeInBits() < ExtSizeInBits) {
+ MVT SrcSVT = Src.getSimpleValueType().getScalarType();
+ MVT SrcVT =
+ MVT::getVectorVT(SrcSVT, ExtSizeInBits / SrcSVT.getSizeInBits());
+ Src = TLO.DAG.getNode(X86ISD::SUBV_BROADCAST, DL, SrcVT, Src);
+ }
+ return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Src, 0,
+ TLO.DAG, DL, ExtSizeInBits));
+ }
// Byte shifts by immediate.
case X86ISD::VSHLDQ:
case X86ISD::VSRLDQ:
@@ -43839,6 +43841,7 @@ static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG,
Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2 &&
isNullConstant(Vec.getOperand(2)) && !Vec.getOperand(0).isUndef() &&
+ Vec.getOperand(1).getValueSizeInBits() == SubVecVT.getSizeInBits() &&
Vec.hasOneUse()) {
Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, DAG.getUNDEF(OpVT),
Vec.getOperand(1), Vec.getOperand(2));
@@ -44660,10 +44663,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
case 'I':
case 'J':
case 'K':
- case 'L':
- case 'M':
case 'N':
case 'G':
+ case 'L':
+ case 'M':
+ return C_Immediate;
case 'C':
case 'e':
case 'Z':
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 3a4283ae5406..147af8bc37c9 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3288,26 +3288,35 @@ foldShiftIntoShiftInAnotherHandOfAndInICmp(ICmpInst &I, const SimplifyQuery SQ,
// Look for an 'and' of two (opposite) logical shifts.
// Pick the single-use shift as XShift.
- Value *XShift, *YShift;
+ Instruction *XShift, *YShift;
if (!match(I.getOperand(0),
- m_c_And(m_OneUse(m_CombineAnd(m_AnyLogicalShift, m_Value(XShift))),
- m_CombineAnd(m_AnyLogicalShift, m_Value(YShift)))))
+ m_c_And(m_CombineAnd(m_AnyLogicalShift, m_Instruction(XShift)),
+ m_CombineAnd(m_AnyLogicalShift, m_Instruction(YShift)))))
return nullptr;
- // If YShift is a single-use 'lshr', swap the shifts around.
- if (match(YShift, m_OneUse(m_AnyLShr)))
+ // If YShift is a 'lshr', swap the shifts around.
+ if (match(YShift, m_AnyLShr))
std::swap(XShift, YShift);
// The shifts must be in opposite directions.
- Instruction::BinaryOps XShiftOpcode =
- cast<BinaryOperator>(XShift)->getOpcode();
- if (XShiftOpcode == cast<BinaryOperator>(YShift)->getOpcode())
+ auto XShiftOpcode = XShift->getOpcode();
+ if (XShiftOpcode == YShift->getOpcode())
return nullptr; // Do not care about same-direction shifts here.
Value *X, *XShAmt, *Y, *YShAmt;
match(XShift, m_BinOp(m_Value(X), m_Value(XShAmt)));
match(YShift, m_BinOp(m_Value(Y), m_Value(YShAmt)));
+ // If one of the values being shifted is a constant, then we will end with
+ // and+icmp, and shift instr will be constant-folded. If they are not,
+ // however, we will need to ensure that we won't increase instruction count.
+ if (!isa<Constant>(X) && !isa<Constant>(Y)) {
+ // At least one of the hands of the 'and' should be one-use shift.
+ if (!match(I.getOperand(0),
+ m_c_And(m_OneUse(m_AnyLogicalShift), m_Value())))
+ return nullptr;
+ }
+
// Can we fold (XShAmt+YShAmt) ?
Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, XShAmt, YShAmt,
SQ.getWithInstruction(&I));
diff --git a/lib/Transforms/Scalar/DivRemPairs.cpp b/lib/Transforms/Scalar/DivRemPairs.cpp
index 876681b4f9de..e64651d97495 100644
--- a/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -23,6 +23,7 @@
#include "llvm/Support/DebugCounter.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+
using namespace llvm;
#define DEBUG_TYPE "div-rem-pairs"
@@ -32,24 +33,44 @@ STATISTIC(NumDecomposed, "Number of instructions decomposed");
DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform",
"Controls transformations in div-rem-pairs pass");
-/// Find matching pairs of integer div/rem ops (they have the same numerator,
-/// denominator, and signedness). If they exist in different basic blocks, bring
-/// them together by hoisting or replace the common division operation that is
-/// implicit in the remainder:
-/// X % Y <--> X - ((X / Y) * Y).
-///
-/// We can largely ignore the normal safety and cost constraints on speculation
-/// of these ops when we find a matching pair. This is because we are already
-/// guaranteed that any exceptions and most cost are already incurred by the
-/// first member of the pair.
-///
-/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
-/// SimplifyCFG, but it's split off on its own because it's different enough
-/// that it doesn't quite match the stated objectives of those passes.
-static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
- const DominatorTree &DT) {
- bool Changed = false;
+/// A thin wrapper to store two values that we matched as div-rem pair.
+/// We want this extra indirection to avoid dealing with RAUW'ing the map keys.
+struct DivRemPairWorklistEntry {
+ /// The actual udiv/sdiv instruction. Source of truth.
+ AssertingVH<Instruction> DivInst;
+
+ /// The instruction that we have matched as a remainder instruction.
+ /// Should only be used as Value, don't introspect it.
+ AssertingVH<Instruction> RemInst;
+
+ DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_)
+ : DivInst(DivInst_), RemInst(RemInst_) {
+ assert((DivInst->getOpcode() == Instruction::UDiv ||
+ DivInst->getOpcode() == Instruction::SDiv) &&
+ "Not a division.");
+ assert(DivInst->getType() == RemInst->getType() && "Types should match.");
+ // We can't check anything else about remainder instruction,
+ // it's not strictly required to be a urem/srem.
+ }
+ /// The type for this pair, identical for both the div and rem.
+ Type *getType() const { return DivInst->getType(); }
+
+ /// Is this pair signed or unsigned?
+ bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; }
+
+ /// In this pair, what are the divident and divisor?
+ Value *getDividend() const { return DivInst->getOperand(0); }
+ Value *getDivisor() const { return DivInst->getOperand(1); }
+};
+using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>;
+
+/// Find matching pairs of integer div/rem ops (they have the same numerator,
+/// denominator, and signedness). Place those pairs into a worklist for further
+/// processing. This indirection is needed because we have to use TrackingVH<>
+/// because we will be doing RAUW, and if one of the rem instructions we change
+/// happens to be an input to another div/rem in the maps, we'd have problems.
+static DivRemWorklistTy getWorklist(Function &F) {
// Insert all divide and remainder instructions into maps keyed by their
// operands and opcode (signed or unsigned).
DenseMap<DivRemMapKey, Instruction *> DivMap;
@@ -69,6 +90,9 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
}
}
+ // We'll accumulate the matching pairs of div-rem instructions here.
+ DivRemWorklistTy Worklist;
+
// We can iterate over either map because we are only looking for matched
// pairs. Choose remainders for efficiency because they are usually even more
// rare than division.
@@ -78,12 +102,45 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
if (!DivInst)
continue;
- // We have a matching pair of div/rem instructions. If one dominates the
- // other, hoist and/or replace one.
+ // We have a matching pair of div/rem instructions.
NumPairs++;
Instruction *RemInst = RemPair.second;
- bool IsSigned = DivInst->getOpcode() == Instruction::SDiv;
- bool HasDivRemOp = TTI.hasDivRemOp(DivInst->getType(), IsSigned);
+
+ // Place it in the worklist.
+ Worklist.emplace_back(DivInst, RemInst);
+ }
+
+ return Worklist;
+}
+
+/// Find matching pairs of integer div/rem ops (they have the same numerator,
+/// denominator, and signedness). If they exist in different basic blocks, bring
+/// them together by hoisting or replace the common division operation that is
+/// implicit in the remainder:
+/// X % Y <--> X - ((X / Y) * Y).
+///
+/// We can largely ignore the normal safety and cost constraints on speculation
+/// of these ops when we find a matching pair. This is because we are already
+/// guaranteed that any exceptions and most cost are already incurred by the
+/// first member of the pair.
+///
+/// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or
+/// SimplifyCFG, but it's split off on its own because it's different enough
+/// that it doesn't quite match the stated objectives of those passes.
+static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
+ const DominatorTree &DT) {
+ bool Changed = false;
+
+ // Get the matching pairs of div-rem instructions. We want this extra
+ // indirection to avoid dealing with having to RAUW the keys of the maps.
+ DivRemWorklistTy Worklist = getWorklist(F);
+
+ // Process each entry in the worklist.
+ for (DivRemPairWorklistEntry &E : Worklist) {
+ bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned());
+
+ auto &DivInst = E.DivInst;
+ auto &RemInst = E.RemInst;
// If the target supports div+rem and the instructions are in the same block
// already, there's nothing to do. The backend should handle this. If the
@@ -110,8 +167,8 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// The target does not have a single div/rem operation. Decompose the
// remainder calculation as:
// X % Y --> X - ((X / Y) * Y).
- Value *X = RemInst->getOperand(0);
- Value *Y = RemInst->getOperand(1);
+ Value *X = E.getDividend();
+ Value *Y = E.getDivisor();
Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y);
Instruction *Sub = BinaryOperator::CreateSub(X, Mul);
@@ -152,8 +209,13 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// Now kill the explicit remainder. We have replaced it with:
// (sub X, (mul (div X, Y), Y)
- RemInst->replaceAllUsesWith(Sub);
- RemInst->eraseFromParent();
+ Sub->setName(RemInst->getName() + ".decomposed");
+ Instruction *OrigRemInst = RemInst;
+ // Update AssertingVH<> with new instruction so it doesn't assert.
+ RemInst = Sub;
+ // And replace the original instruction with the new one.
+ OrigRemInst->replaceAllUsesWith(Sub);
+ OrigRemInst->eraseFromParent();
NumDecomposed++;
}
Changed = true;
@@ -188,7 +250,7 @@ struct DivRemPairsLegacyPass : public FunctionPass {
return optimizeDivRem(F, TTI, DT);
}
};
-}
+} // namespace
char DivRemPairsLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
diff --git a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
index c13fb3e04516..e6db11f47ead 100644
--- a/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
+++ b/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp
@@ -777,8 +777,10 @@ static bool tryToSpeculatePHIs(SmallVectorImpl<PHINode *> &PNs,
// speculation if the predecessor is an invoke. This doesn't seem
// fundamental and we should probably be splitting critical edges
// differently.
- if (isa<IndirectBrInst>(PredBB->getTerminator()) ||
- isa<InvokeInst>(PredBB->getTerminator())) {
+ const auto *TermInst = PredBB->getTerminator();
+ if (isa<IndirectBrInst>(TermInst) ||
+ isa<InvokeInst>(TermInst) ||
+ isa<CallBrInst>(TermInst)) {
LLVM_DEBUG(dbgs() << " Invalid: predecessor terminator: "
<< PredBB->getName() << "\n");
return false;