aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp6
-rw-r--r--lib/DebugInfo/DWARF/DWARFDebugFrame.cpp4
-rw-r--r--lib/MC/ELFObjectWriter.cpp1
-rw-r--r--lib/MC/MCWin64EH.cpp2
-rw-r--r--lib/MC/WasmObjectWriter.cpp8
-rw-r--r--lib/Object/COFFImportFile.cpp2
-rw-r--r--lib/Target/AArch64/AArch64SchedExynosM4.td36
-rw-r--r--lib/Target/AArch64/AArch64SchedPredExynos.td11
-rw-r--r--lib/Target/AArch64/AArch64SchedPredicates.td53
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp73
-rw-r--r--lib/Target/AMDGPU/VOP2Instructions.td12
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp30
-rw-r--r--lib/Target/AVR/AVRISelLowering.cpp28
-rw-r--r--lib/Target/AVR/AVRISelLowering.h8
-rw-r--r--lib/Target/AVR/AVRSubtarget.cpp12
-rw-r--r--lib/Target/AVR/AVRSubtarget.h5
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp7
-rw-r--r--lib/Target/Mips/MicroMips32r6InstrInfo.td4
-rw-r--r--lib/Target/Mips/MicroMipsInstrFPU.td5
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp3
-rw-r--r--lib/Target/Mips/MipsDSPInstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp1
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp35
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp3
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp8
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp30
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp4
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp3
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp4
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp9
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp6
34 files changed, 226 insertions, 202 deletions
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 661dc18f7a85..d3059280a60f 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -398,6 +399,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI =
MRI.getRegAllocationHints(VirtReg);
+ SmallSet<unsigned, 32> HintedRegs;
// First hint may be a target hint.
bool Skip = (Hints_MRI.first != 0);
for (auto Reg : Hints_MRI.second) {
@@ -411,6 +413,10 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (VRM && isVirtualRegister(Phys))
Phys = VRM->getPhys(Phys);
+ // Don't add the same reg twice (Hints_MRI may contain multiple virtual
+ // registers allocated to the same physreg).
+ if (!HintedRegs.insert(Phys).second)
+ continue;
// Check that Phys is a valid hint in VirtReg's register class.
if (!isPhysicalRegister(Phys))
continue;
diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index ba55ffc28174..8a88a2fa3a09 100644
--- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -301,7 +301,7 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor);
OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister);
if (Personality)
- OS << format(" Personality Address: %08x\n", *Personality);
+ OS << format(" Personality Address: %016" PRIx64 "\n", *Personality);
if (!AugmentationData.empty()) {
OS << " Augmentation data: ";
for (uint8_t Byte : AugmentationData)
@@ -320,7 +320,7 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const {
(uint32_t)InitialLocation,
(uint32_t)InitialLocation + (uint32_t)AddressRange);
if (LSDAAddress)
- OS << format(" LSDA Address: %08x\n", *LSDAAddress);
+ OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress);
CFIs.dump(OS, MRI, IsEH);
OS << "\n";
}
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index ade858113a30..1b505776ca19 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -1271,6 +1271,7 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
// This is the first place we are able to copy this information.
Alias->setExternal(Symbol.isExternal());
Alias->setBinding(Symbol.getBinding());
+ Alias->setOther(Symbol.getOther());
if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
continue;
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index 8bc1f08c8875..3ef1514455af 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -522,7 +522,7 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) {
if (MatchingEpilog) {
assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
"Duplicate epilog not found");
- EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog];
+ EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog);
// Clear the unwind codes in the EpilogMap, so that they don't get output
// in the logic below.
EpilogInstrs.clear();
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 333748db9190..b07fe05cad5b 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -368,7 +368,13 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section,
// Now that the section is complete and we know how big it is, patch up the
// section size field at the start of the section.
void WasmObjectWriter::endSection(SectionBookkeeping &Section) {
- uint64_t Size = W.OS.tell() - Section.PayloadOffset;
+ uint64_t Size = W.OS.tell();
+ // /dev/null doesn't support seek/tell and can report offset of 0.
+ // Simply skip this patching in that case.
+ if (!Size)
+ return;
+
+ Size -= Section.PayloadOffset;
if (uint32_t(Size) != Size)
report_fatal_error("section size does not fit in a uint32_t");
diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp
index dc11cc4bcffe..e7c7efe43676 100644
--- a/lib/Object/COFFImportFile.cpp
+++ b/lib/Object/COFFImportFile.cpp
@@ -496,7 +496,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym,
// COFF Header
coff_file_header Header{
- u16(0),
+ u16(Machine),
u16(NumberOfSections),
u32(0),
u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))),
diff --git a/lib/Target/AArch64/AArch64SchedExynosM4.td b/lib/Target/AArch64/AArch64SchedExynosM4.td
index 4d892465b3f2..61652b1d8e3d 100644
--- a/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -239,7 +239,6 @@ def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF,
M4UnitS0]> { let Latency = 5;
let NumMicroOps = 2; }
def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
-def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; }
def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC,
M4UnitNMSC]> { let Latency = 5;
let NumMicroOps = 2; }
@@ -480,8 +479,6 @@ def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>,
SchedVar<NoSchedPred, [M4WriteZ0]>]>;
def M4WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>,
SchedVar<NoSchedPred, [M4WriteNALU1]>]>;
-def M4WriteMULL : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>,
- SchedVar<NoSchedPred, [M4WriteNMUL3]>]>;
// Fast forwarding.
def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>;
@@ -489,7 +486,8 @@ def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4,
M4WriteFMAC4H,
M4WriteFMAC5]>;
def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>;
-def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>;
+def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>;
+
//===----------------------------------------------------------------------===//
// Coarse scheduling model.
@@ -662,10 +660,8 @@ def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>;
def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>;
def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>;
def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>;
-def : InstRW<[M4WriteFMAC4H,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>;
-def : InstRW<[M4WriteFMAC4,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>;
+def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>;
// FP load instructions.
def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>;
@@ -736,14 +732,20 @@ def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>;
def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>;
def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>;
def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>;
-def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>;
def : InstRW<[M4WriteNMUL3,
M4ReadNMULM1], (instregex "^ML[AS]v")>;
-def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>;
-def : InstRW<[M4WriteMULL,
- M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>;
-def : InstRW<[M4WriteMULL,
- M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^SQRDML[AS]H")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>;
+def : InstRW<[M4WriteNMUL3,
+ M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>;
def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>;
def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>;
def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>;
@@ -808,10 +810,8 @@ def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>;
def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>;
def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>;
def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>;
-def : InstRW<[M4WriteFMAC4H,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>;
-def : InstRW<[M4WriteFMAC4,
- M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
+def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>;
+def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>;
def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>;
def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>;
def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>;
diff --git a/lib/Target/AArch64/AArch64SchedPredExynos.td b/lib/Target/AArch64/AArch64SchedPredExynos.td
index 48c54230e9d8..316036d89406 100644
--- a/lib/Target/AArch64/AArch64SchedPredExynos.td
+++ b/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -103,17 +103,6 @@ def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>;
// Identify FP instructions.
def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>;
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def ExynosLongVectorUpperFn : TIIPredicate<
- "isExynosLongVectorUpper",
- MCOpcodeSwitchStatement<
- [MCOpcodeSwitchCase<
- IsLongVectorUpperOp.ValidOpcodes,
- MCReturnStatement<TruePred>>],
- MCReturnStatement<FalsePred>>>;
-def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>;
-
// Identify 128-bit NEON instructions.
def ExynosQFormPred : MCSchedPredicate<CheckQForm>;
diff --git a/lib/Target/AArch64/AArch64SchedPredicates.td b/lib/Target/AArch64/AArch64SchedPredicates.td
index dbaf11fc95dd..b23572b41b9c 100644
--- a/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -268,59 +268,6 @@ def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX,
def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes,
IsStoreRegOffsetOp.ValidOpcodes)>;
-// Identify whether an instruction whose result is a long vector
-// operates on the upper half of the input registers.
-def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32,
- FCVTNv8i16, FCVTNv4i32,
- FCVTXNv4f32,
- PMULLv16i8, PMULLv2i64,
- RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32,
- RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift,
- RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32,
- SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64,
- SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64,
- SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64,
- SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64,
- SHLLv16i8, SHLLv8i16, SHLLv4i32,
- SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift,
- SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64,
- SMLALv8i16_indexed, SMLALv4i32_indexed,
- SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64,
- SMLSLv8i16_indexed, SMLSLv4i32_indexed,
- SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64,
- SMULLv8i16_indexed, SMULLv4i32_indexed,
- SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64,
- SQDMLALv8i16_indexed, SQDMLALv4i32_indexed,
- SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64,
- SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed,
- SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64,
- SQDMULLv8i16_indexed, SQDMULLv4i32_indexed,
- SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift,
- SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift,
- SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift,
- SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift,
- SQXTNv16i8, SQXTNv8i16, SQXTNv4i32,
- SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32,
- SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift,
- SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64,
- SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64,
- UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64,
- UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64,
- UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64,
- UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64,
- UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64,
- UMLALv8i16_indexed, UMLALv4i32_indexed,
- UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64,
- UMLSLv8i16_indexed, UMLSLv4i32_indexed,
- UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64,
- UMULLv8i16_indexed, UMULLv4i32_indexed,
- UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift,
- UQXTNv16i8, UQXTNv8i16, UQXTNv4i32,
- USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift,
- USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64,
- USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64,
- XTNv16i8, XTNv8i16, XTNv4i32]>;
-
// Target predicates.
// Identify an instruction that effectively transfers a register to another.
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index f4e866958369..d679abd107d2 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -201,49 +201,55 @@ static bool updateOperand(FoldCandidate &Fold,
Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1);
}
}
+ }
- if (Fold.needsShrink()) {
- MachineBasicBlock *MBB = MI->getParent();
- auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
- if (Liveness != MachineBasicBlock::LQR_Dead)
- return false;
-
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- int Op32 = Fold.getShrinkOpcode();
- MachineOperand &Dst0 = MI->getOperand(0);
- MachineOperand &Dst1 = MI->getOperand(1);
- assert(Dst0.isDef() && Dst1.isDef());
-
- bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
+ if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) {
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI);
+ if (Liveness != MachineBasicBlock::LQR_Dead)
+ return false;
- const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
- unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
- const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg());
- unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ int Op32 = Fold.getShrinkOpcode();
+ MachineOperand &Dst0 = MI->getOperand(0);
+ MachineOperand &Dst1 = MI->getOperand(1);
+ assert(Dst0.isDef() && Dst1.isDef());
- MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
+ bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg());
- if (HaveNonDbgCarryUse) {
- BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
- .addReg(AMDGPU::VCC, RegState::Kill);
- }
+ const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg());
+ unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC);
- // Keep the old instruction around to avoid breaking iterators, but
- // replace the outputs with dummy registers.
- Dst0.setReg(NewReg0);
- Dst1.setReg(NewReg1);
+ MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32);
- if (Fold.isCommuted())
- TII.commuteInstruction(*Inst32, false);
- return true;
+ if (HaveNonDbgCarryUse) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg())
+ .addReg(AMDGPU::VCC, RegState::Kill);
}
- Old.ChangeToImmediate(Fold.ImmToFold);
+ // Keep the old instruction around to avoid breaking iterators, but
+ // replace it with a dummy instruction to remove uses.
+ //
+ // FIXME: We should not invert how this pass looks at operands to avoid
+ // this. Should track set of foldable movs instead of looking for uses
+ // when looking at a use.
+ Dst0.setReg(NewReg0);
+ for (unsigned I = MI->getNumOperands() - 1; I > 0; --I)
+ MI->RemoveOperand(I);
+ MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF));
+
+ if (Fold.isCommuted())
+ TII.commuteInstruction(*Inst32, false);
return true;
}
assert(!Fold.needsShrink() && "not handled");
+ if (Fold.isImm()) {
+ Old.ChangeToImmediate(Fold.ImmToFold);
+ return true;
+ }
+
if (Fold.isFI()) {
Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
return true;
@@ -344,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if ((Opc == AMDGPU::V_ADD_I32_e64 ||
Opc == AMDGPU::V_SUB_I32_e64 ||
Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
- OpToFold->isImm()) {
+ (OpToFold->isImm() || OpToFold->isFI())) {
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
// Verify the other operand is a VGPR, otherwise we would violate the
@@ -357,7 +363,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
assert(MI->getOperand(1).isDef());
- int Op32 = AMDGPU::getVOPe32(Opc);
+ // Make sure to get the 32-bit version of the commuted opcode.
+ unsigned MaybeCommutedOpc = MI->getOpcode();
+ int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+
FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true,
Op32));
return true;
diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td
index e3fd7b5f9fad..8cf524a5128d 100644
--- a/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/lib/Target/AMDGPU/VOP2Instructions.td
@@ -515,18 +515,12 @@ let AddedComplexity = 1 in {
}
let SubtargetPredicate = HasAddNoCarryInsts in {
- def : DivergentBinOp<add, V_ADD_U32_e32>;
- def : DivergentBinOp<sub, V_SUB_U32_e32>;
- def : DivergentBinOp<sub, V_SUBREV_U32_e32>;
+ def : DivergentBinOp<add, V_ADD_U32_e64>;
+ def : DivergentBinOp<sub, V_SUB_U32_e64>;
}
-
-def : DivergentBinOp<add, V_ADD_I32_e32>;
-
def : DivergentBinOp<add, V_ADD_I32_e64>;
-def : DivergentBinOp<sub, V_SUB_I32_e32>;
-
-def : DivergentBinOp<sub, V_SUBREV_I32_e32>;
+def : DivergentBinOp<sub, V_SUB_I32_e64>;
def : DivergentBinOp<srl, V_LSHRREV_B32_e32>;
def : DivergentBinOp<sra, V_ASHRREV_I32_e32>;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 21de0f6a7630..7e90edbbdabb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1984,32 +1984,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- // Tail call byval lowering might overwrite argument registers so in case of
- // tail call optimization the copies to registers are lowered later.
- if (!isTailCall)
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // For tail calls lower the arguments to the 'real' stack slot.
- if (isTailCall) {
- // Force all the incoming stack arguments to be loaded from the stack
- // before any new outgoing arguments are stored to the stack, because the
- // outgoing stack slots may alias the incoming argument stack slots, and
- // the alias isn't otherwise explicit. This is slightly more conservative
- // than necessary, because it means that each store effectively depends
- // on every argument instead of just those arguments it would clobber.
-
- // Do not flag preceding copytoreg stuff together with the following stuff.
- InFlag = SDValue();
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
- InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp
index 57fc978b54bb..5db757782322 100644
--- a/lib/Target/AVR/AVRISelLowering.cpp
+++ b/lib/Target/AVR/AVRISelLowering.cpp
@@ -26,19 +26,21 @@
#include "AVR.h"
#include "AVRMachineFunctionInfo.h"
+#include "AVRSubtarget.h"
#include "AVRTargetMachine.h"
#include "MCTargetDesc/AVRMCTargetDesc.h"
namespace llvm {
-AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
- : TargetLowering(tm) {
+AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM,
+ const AVRSubtarget &STI)
+ : TargetLowering(TM), Subtarget(STI) {
// Set up the register classes.
addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
// Compute derived properties from the register classes.
- computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo());
+ computeRegisterProperties(Subtarget.getRegisterInfo());
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent);
@@ -163,6 +165,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ // Expand multiplications to libcalls when there is
+ // no hardware MUL.
+ if (!Subtarget.supportsMultiplication()) {
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ }
+
for (MVT VT : MVT::integer_valuetypes()) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
@@ -1271,7 +1280,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Add a register mask operand representing the call-preserved registers.
const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *Mask =
TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
@@ -1434,7 +1443,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
MachineFunction *F = BB->getParent();
MachineRegisterInfo &RI = F->getRegInfo();
const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
switch (MI.getOpcode()) {
@@ -1575,7 +1584,7 @@ static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
MachineBasicBlock *BB) const {
const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
- const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineBasicBlock::iterator I(MI);
++I; // in any case insert *after* the mul instruction
if (isCopyMulResult(I))
@@ -1838,9 +1847,6 @@ std::pair<unsigned, const TargetRegisterClass *>
AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- auto STI = static_cast<const AVRTargetMachine &>(this->getTargetMachine())
- .getSubtargetImpl();
-
// We only support i8 and i16.
//
//:FIXME: remove this assert for now since it gets sometimes executed
@@ -1884,8 +1890,8 @@ AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(),
- Constraint, VT);
+ return TargetLowering::getRegForInlineAsmConstraint(
+ Subtarget.getRegisterInfo(), Constraint, VT);
}
void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h
index c90c65c81f70..7d77dd8fb018 100644
--- a/lib/Target/AVR/AVRISelLowering.h
+++ b/lib/Target/AVR/AVRISelLowering.h
@@ -64,12 +64,14 @@ enum NodeType {
} // end of namespace AVRISD
+class AVRSubtarget;
class AVRTargetMachine;
/// Performs target lowering for the AVR.
class AVRTargetLowering : public TargetLowering {
public:
- explicit AVRTargetLowering(AVRTargetMachine &TM);
+ explicit AVRTargetLowering(const AVRTargetMachine &TM,
+ const AVRSubtarget &STI);
public:
MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override {
@@ -164,6 +166,10 @@ private:
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+protected:
+
+ const AVRSubtarget &Subtarget;
+
private:
MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp
index 556d69ec5234..c7c566270f43 100644
--- a/lib/Target/AVR/AVRSubtarget.cpp
+++ b/lib/Target/AVR/AVRSubtarget.cpp
@@ -29,9 +29,9 @@
namespace llvm {
AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
- const std::string &FS, AVRTargetMachine &TM)
+ const std::string &FS, const AVRTargetMachine &TM)
: AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(),
- TLInfo(TM), TSInfo(),
+ TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(),
// Subtarget features
m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false),
@@ -44,4 +44,12 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
ParseSubtargetFeatures(CPU, FS);
}
+AVRSubtarget &
+AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine &TM) {
+ // Parse features string.
+ ParseSubtargetFeatures(CPU, FS);
+ return *this;
+}
+
} // end of namespace llvm
diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h
index fa26738da190..ba036d5e4061 100644
--- a/lib/Target/AVR/AVRSubtarget.h
+++ b/lib/Target/AVR/AVRSubtarget.h
@@ -37,7 +37,7 @@ public:
//! \param FS The feature string.
//! \param TM The target machine.
AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
- AVRTargetMachine &TM);
+ const AVRTargetMachine &TM);
const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; }
@@ -49,6 +49,9 @@ public:
/// \note Definition of function is auto generated by `tblgen`.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
// Subtarget feature getters.
// See AVR.td for details.
bool hasSRAM() const { return m_hasSRAM; }
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 58f9717e1cc6..a46f84bd1c9c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -700,8 +700,11 @@ void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation,
}
void MipsTargetAsmStreamer::emitDirectiveModuleFP() {
- OS << "\t.module\tfp=";
- OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n";
+ MipsABIFlagsSection::FpABIKind FpABI = ABIFlagsSection.getFpABI();
+ if (FpABI == MipsABIFlagsSection::FpABIKind::SOFT)
+ OS << "\t.module\tsoftfloat\n";
+ else
+ OS << "\t.module\tfp=" << ABIFlagsSection.getFpABIString(FpABI) << "\n";
}
void MipsTargetAsmStreamer::emitDirectiveSetFp(
diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td
index c441aa76ad40..994a8882f942 100644
--- a/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -1040,7 +1040,7 @@ class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd,
class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd,
FGR32Opnd, II_TRUNC>;
class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd,
- AFGR64Opnd, II_TRUNC>;
+ FGR64Opnd, II_TRUNC>;
class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
II_SQRT_S, fsqrt>;
class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
@@ -1750,6 +1750,8 @@ def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6;
def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6;
def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
(TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_W_S_MMR6 FGR32Opnd:$src)>, ISA_MICROMIPS32R6;
def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm),
(ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>,
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index 1731afc1961f..9e76165e7ad7 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -425,6 +425,11 @@ def : MipsPat<(f64 (fpextend FGR32Opnd:$src)),
def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
(TRUNC_W_MM AFGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
FGR_32;
+def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (CVT_W_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6,
+ FGR_64;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_W_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6;
// Selects
defm : MovzPats0<GPR32, FGR32, MOVZ_I_S_MM, SLT_MM, SLTu_MM, SLTi_MM, SLTiu_MM>,
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index a7a748b0840e..c35f5beb6880 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -813,7 +813,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// We should always emit a '.module fp=...' but binutils 2.24 does not accept
// it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or
// -mfp64) and omit it otherwise.
- if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit()))
+ if ((ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) ||
+ STI.useSoftFloat())
TS.emitDirectiveModuleFP();
// We should always emit a '.module [no]oddspreg' but binutils 2.24 does not
diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td
index b9824220b558..a4078026e4f9 100644
--- a/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -1314,7 +1314,9 @@ def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>;
def PseudoPICK_PH : PseudoPICK<PICK_PH>;
def PseudoPICK_QB : PseudoPICK<PICK_QB>;
-def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+let AdditionalPredicates = [HasDSP] in {
+ def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>;
+}
// Patterns.
class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> :
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index e3823e0dfdb8..61e77fbeea6d 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -726,6 +726,7 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin,
// but we don't have enough information to make that decision.
if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
(Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
+ Opcode == Mips::PseudoIndirectBranch_MM ||
Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
continue;
// Instructions LWP/SWP and MOVEP should not be in a delay slot as that
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
index a18416b9e861..168750b2cba9 100644
--- a/lib/Target/Mips/MipsFastISel.cpp
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -954,21 +954,34 @@ bool MipsFastISel::selectBranch(const Instruction *I) {
//
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- // For now, just try the simplest case where it's fed by a compare.
+
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block.
+ unsigned ZExtCondReg = 0;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- MVT CIMVT =
- TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT();
- if (CIMVT == MVT::i1)
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+ ZExtCondReg = createResultReg(&Mips::GPR32RegClass);
+ if (!emitCmp(ZExtCondReg, CI))
+ return false;
+ }
+ }
+
+ // For the general case, we need to mask with 1.
+ if (ZExtCondReg == 0) {
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (CondReg == 0)
return false;
- unsigned CondReg = getRegForValue(CI);
- BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
- .addReg(CondReg)
- .addMBB(TBB);
- finishCondBranch(BI->getParent(), TBB, FBB);
- return true;
+ ZExtCondReg = emitIntExt(MVT::i1, CondReg, MVT::i32, true);
+ if (ZExtCondReg == 0)
+ return false;
}
- return false;
+
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ))
+ .addReg(ZExtCondReg)
+ .addMBB(TBB);
+ finishCondBranch(BI->getParent(), TBB, FBB);
+ return true;
}
bool MipsFastISel::selectCmp(const Instruction *I) {
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index c7ab90ed2a3b..2b26caaa9f49 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -447,6 +447,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case Mips::PseudoMTLOHI_DSP:
expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true);
break;
+ case Mips::PseudoMTLOHI_MM:
+ expandPseudoMTLoHi(MBB, MI, Mips::MTLO_MM, Mips::MTHI_MM, false);
+ break;
case Mips::PseudoCVT_S_W:
expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false);
break;
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 26869f250823..cce239cac970 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -61,6 +61,14 @@ extern "C" void LLVMInitializePowerPCDisassembler() {
createPPCLEDisassembler);
}
+static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ int32_t Offset = SignExtend32<24>(Imm);
+ Inst.addOperand(MCOperand::createImm(Offset));
+ return MCDisassembler::Success;
+}
+
// FIXME: These can be generated by TableGen from the existing register
// encoding values!
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index fc29e4effbb1..6824168b890d 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -382,8 +382,11 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
// Branches can take an immediate operand. This is used by the branch
// selection pass to print .+8, an eight byte displacement from the PC.
- O << ".+";
- printAbsBranchOperand(MI, OpNo, O);
+ O << ".";
+ int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2);
+ if (Imm >= 0)
+ O << "+";
+ O << Imm;
}
void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index a1e4e07b25af..78609ef3d4e0 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -15,6 +15,7 @@
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCAsmInfo.h"
#include "PPCTargetStreamer.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -182,16 +183,33 @@ public:
void emitAssignment(MCSymbol *S, const MCExpr *Value) override {
auto *Symbol = cast<MCSymbolELF>(S);
+
// When encoding an assignment to set symbol A to symbol B, also copy
// the st_other bits encoding the local entry point offset.
- if (Value->getKind() != MCExpr::SymbolRef)
- return;
- const auto &RhsSym = cast<MCSymbolELF>(
- static_cast<const MCSymbolRefExpr *>(Value)->getSymbol());
- unsigned Other = Symbol->getOther();
+ if (copyLocalEntry(Symbol, Value))
+ UpdateOther.insert(Symbol);
+ else
+ UpdateOther.erase(Symbol);
+ }
+
+ void finish() override {
+ for (auto *Sym : UpdateOther)
+ copyLocalEntry(Sym, Sym->getVariableValue());
+ }
+
+private:
+ SmallPtrSet<MCSymbolELF *, 32> UpdateOther;
+
+ bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) {
+ auto *Ref = dyn_cast<const MCSymbolRefExpr>(S);
+ if (!Ref)
+ return false;
+ const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol());
+ unsigned Other = D->getOther();
Other &= ~ELF::STO_PPC64_LOCAL_MASK;
Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK;
- Symbol->setOther(Other);
+ D->setOther(Other);
+ return true;
}
};
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 31acd0ff870f..70e9049a2ab3 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4359,8 +4359,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
const Module *M = MF->getFunction().getParent();
if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
- !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() ||
- M->getPICLevel() == PICLevel::SmallPIC)
+ (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) ||
+ !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC)
break;
SDValue Op = N->getOperand(1);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index dd3f1ac79089..77aa4fe3d415 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -737,7 +737,9 @@ def abscondbrtarget : Operand<OtherVT> {
def calltarget : Operand<iPTR> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
+ let DecoderMethod = "DecodePCRel24BranchTarget";
let ParserMatchClass = PPCDirectBrAsmOperand;
+ let OperandType = "OPERAND_PCREL";
}
def abscalltarget : Operand<iPTR> {
let PrintMethod = "printAbsBranchOperand";
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index c0cbfd779cb9..1fdf74549dec 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -138,6 +138,9 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
if (isDarwin())
HasLazyResolverStubs = true;
+ if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD())
+ SecurePlt = true;
+
if (HasSPE && IsPPC64)
report_fatal_error( "SPE is only supported for 32-bit targets.\n", false);
if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU))
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 33caa66154ff..ad6ea3760fee 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -189,7 +189,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri))
.addReg(FrameReg).addImm(0).addReg(SrcEvenReg);
- replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg);
+ replaceFI(MF, *StMI, *StMI, dl, 0, Offset, FrameReg);
MI.setDesc(TII.get(SP::STDFri));
MI.getOperand(2).setReg(SrcOddReg);
Offset += 8;
@@ -201,7 +201,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr *StMI =
BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg)
.addReg(FrameReg).addImm(0);
- replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg);
+ replaceFI(MF, *StMI, *StMI, dl, 1, Offset, FrameReg);
MI.setDesc(TII.get(SP::LDDFri));
MI.getOperand(0).setReg(DestOddReg);
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 003848e34227..f7f29d85cbb2 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -669,13 +669,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsVarArg) {
// Outgoing non-fixed arguments are placed in a buffer. First
// compute their offsets and the total amount of buffer space needed.
- for (SDValue Arg :
- make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) {
+ for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
+ const ISD::OutputArg &Out = Outs[I];
+ SDValue &Arg = OutVals[I];
EVT VT = Arg.getValueType();
assert(VT != MVT::iPTR && "Legalized args should be concrete");
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ unsigned Align = std::max(Out.Flags.getOrigAlign(),
+ Layout.getABITypeAlignment(Ty));
unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty),
- Layout.getABITypeAlignment(Ty));
+ Align);
CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
Offset, VT.getSimpleVT(),
CCValAssign::Full));
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 9dd3f2652543..12cd613c34cb 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
case MVT::v2i64:
case MVT::v8i16:
case MVT::v16i8:
- if (IsNonTemporal && Alignment >= 16)
+ if (IsNonTemporal && Alignment >= 16 && HasSSE41)
Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
else if (Alignment >= 16)
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index afcb49dc2263..217a12ddf896 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
@@ -512,6 +513,9 @@ void X86PassConfig::addPreEmitPass2() {
// correct CFA calculation rule where needed by inserting appropriate CFI
// instructions.
const Triple &TT = TM->getTargetTriple();
- if (!TT.isOSDarwin() && !TT.isOSWindows())
+ const MCAsmInfo *MAI = TM->getMCAsmInfo();
+ if (!TT.isOSDarwin() &&
+ (!TT.isOSWindows() ||
+ MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI))
addPass(createCFIInstrInserter());
}