diff options
Diffstat (limited to 'lib')
34 files changed, 226 insertions, 202 deletions
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index 661dc18f7a85..d3059280a60f 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -398,6 +399,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI = MRI.getRegAllocationHints(VirtReg); + SmallSet<unsigned, 32> HintedRegs; // First hint may be a target hint. bool Skip = (Hints_MRI.first != 0); for (auto Reg : Hints_MRI.second) { @@ -411,6 +413,10 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, if (VRM && isVirtualRegister(Phys)) Phys = VRM->getPhys(Phys); + // Don't add the same reg twice (Hints_MRI may contain multiple virtual + // registers allocated to the same physreg). + if (!HintedRegs.insert(Phys).second) + continue; // Check that Phys is a valid hint in VirtReg's register class. if (!isPhysicalRegister(Phys)) continue; diff --git a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp index ba55ffc28174..8a88a2fa3a09 100644 --- a/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp +++ b/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp @@ -301,7 +301,7 @@ void CIE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { OS << format(" Data alignment factor: %d\n", (int32_t)DataAlignmentFactor); OS << format(" Return address column: %d\n", (int32_t)ReturnAddressRegister); if (Personality) - OS << format(" Personality Address: %08x\n", *Personality); + OS << format(" Personality Address: %016" PRIx64 "\n", *Personality); if (!AugmentationData.empty()) { OS << " Augmentation data: "; for (uint8_t Byte : AugmentationData) @@ -320,7 +320,7 @@ void FDE::dump(raw_ostream &OS, const MCRegisterInfo *MRI, bool IsEH) const { (uint32_t)InitialLocation, (uint32_t)InitialLocation + (uint32_t)AddressRange); if (LSDAAddress) - OS << format(" LSDA Address: %08x\n", *LSDAAddress); + OS << format(" LSDA Address: %016" PRIx64 "\n", *LSDAAddress); CFIs.dump(OS, MRI, IsEH); OS << "\n"; } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index ade858113a30..1b505776ca19 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -1271,6 +1271,7 @@ void ELFObjectWriter::executePostLayoutBinding(MCAssembler &Asm, // This is the first place we are able to copy this information. Alias->setExternal(Symbol.isExternal()); Alias->setBinding(Symbol.getBinding()); + Alias->setOther(Symbol.getOther()); if (!Symbol.isUndefined() && !Rest.startswith("@@@")) continue; diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index 8bc1f08c8875..3ef1514455af 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -522,7 +522,7 @@ static void ARM64EmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info) { if (MatchingEpilog) { assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() && "Duplicate epilog not found"); - EpilogInfo[EpilogStart] = EpilogInfo[MatchingEpilog]; + EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog); // Clear the unwind codes in the EpilogMap, so that they don't get output // in the logic below. EpilogInstrs.clear(); diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp index 333748db9190..b07fe05cad5b 100644 --- a/lib/MC/WasmObjectWriter.cpp +++ b/lib/MC/WasmObjectWriter.cpp @@ -368,7 +368,13 @@ void WasmObjectWriter::startCustomSection(SectionBookkeeping &Section, // Now that the section is complete and we know how big it is, patch up the // section size field at the start of the section. void WasmObjectWriter::endSection(SectionBookkeeping &Section) { - uint64_t Size = W.OS.tell() - Section.PayloadOffset; + uint64_t Size = W.OS.tell(); + // /dev/null doesn't support seek/tell and can report offset of 0. + // Simply skip this patching in that case. + if (!Size) + return; + + Size -= Section.PayloadOffset; if (uint32_t(Size) != Size) report_fatal_error("section size does not fit in a uint32_t"); diff --git a/lib/Object/COFFImportFile.cpp b/lib/Object/COFFImportFile.cpp index dc11cc4bcffe..e7c7efe43676 100644 --- a/lib/Object/COFFImportFile.cpp +++ b/lib/Object/COFFImportFile.cpp @@ -496,7 +496,7 @@ NewArchiveMember ObjectFactory::createWeakExternal(StringRef Sym, // COFF Header coff_file_header Header{ - u16(0), + u16(Machine), u16(NumberOfSections), u32(0), u32(sizeof(Header) + (NumberOfSections * sizeof(coff_section))), diff --git a/lib/Target/AArch64/AArch64SchedExynosM4.td b/lib/Target/AArch64/AArch64SchedExynosM4.td index 4d892465b3f2..61652b1d8e3d 100644 --- a/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -239,7 +239,6 @@ def M4WriteNEONK : SchedWriteRes<[M4UnitNSHF, M4UnitS0]> { let Latency = 5; let NumMicroOps = 2; } def M4WriteNEONL : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } -def M4WriteNEONM : SchedWriteRes<[M4UnitNMUL]> { let Latency = 3; } def M4WriteNEONN : SchedWriteRes<[M4UnitNMSC, M4UnitNMSC]> { let Latency = 5; let NumMicroOps = 2; } @@ -480,8 +479,6 @@ def M4WriteCOPY : SchedWriteVariant<[SchedVar<ExynosFPPred, [M4WriteNALU1]>, SchedVar<NoSchedPred, [M4WriteZ0]>]>; def M4WriteMOVI : SchedWriteVariant<[SchedVar<IsZeroFPIdiomPred, [M4WriteZ0]>, SchedVar<NoSchedPred, [M4WriteNALU1]>]>; -def M4WriteMULL : SchedWriteVariant<[SchedVar<ExynosLongVectorUpperPred, [M4WriteNEONM]>, - SchedVar<NoSchedPred, [M4WriteNMUL3]>]>; // Fast forwarding. def M4ReadAESM1 : SchedReadAdvance<+1, [M4WriteNCRY1]>; @@ -489,7 +486,8 @@ def M4ReadFMACM1 : SchedReadAdvance<+1, [M4WriteFMAC4, M4WriteFMAC4H, M4WriteFMAC5]>; def M4ReadNMULM1 : SchedReadAdvance<+1, [M4WriteNMUL3]>; -def M4ReadMULLP2 : SchedReadAdvance<-2, [M4WriteNEONM]>; +def M4ReadNMULP2 : SchedReadAdvance<-2, [M4WriteNMUL3]>; + //===----------------------------------------------------------------------===// // Coarse scheduling model. @@ -662,10 +660,8 @@ def : InstRW<[M4WriteNEONK], (instregex "^FMOVDXHighr")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev1f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev1i(32|64)")>; def : InstRW<[M4WriteNMSC1], (instregex "^FRECPXv1")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)S(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)S16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)S(32|64)")>; // FP load instructions. def : InstRW<[WriteVLD], (instregex "^LDR[SDQ]l")>; @@ -736,14 +732,20 @@ def : InstRW<[M4WriteNALU1], (instregex "^(AND|BIC|EOR|NOT|ORN|ORR)v")>; def : InstRW<[M4WriteNMSC1], (instregex "^[SU](MIN|MAX)v")>; def : InstRW<[M4WriteNMSC2], (instregex "^[SU](MIN|MAX)Pv")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU](MIN|MAX)Vv")>; -def : InstRW<[M4WriteNMUL3], (instregex "^(SQR?D)?MULH?v")>; def : InstRW<[M4WriteNMUL3, M4ReadNMULM1], (instregex "^ML[AS]v")>; -def : InstRW<[M4WriteNMUL3], (instregex "^SQRDML[AS]H")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)ML[AS]Lv")>; -def : InstRW<[M4WriteMULL, - M4ReadMULLP2], (instregex "^(S|U|SQD)MULLv")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(SQR?D)?MULH?v")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^SQRDML[AS]H")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)ML[AS]L(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)ML[AS]L(v4i32|v8i16|v16i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULM1], (instregex "^(S|U|SQD)MULL(v1(i32|i64)|v2i32|v4i16|v8i8)")>; +def : InstRW<[M4WriteNMUL3, + M4ReadNMULP2], (instregex "^(S|U|SQD)MULL(v4i32|v8i16|v16i8)")>; def : InstRW<[M4WriteNMUL3], (instregex "^[SU]DOT(lane)?v")>; def : InstRW<[M4WriteNHAD3], (instregex "^[SU]ADALPv")>; def : InstRW<[M4WriteNSHT4A], (instregex "^[SU]R?SRA[dv]")>; @@ -808,10 +810,8 @@ def : InstRW<[M4WriteNALU1], (instregex "^FMOVv.f(32|64)")>; def : InstRW<[M4WriteFCVT3H], (instregex "^F(RECP|RSQRT)Ev[248]f16")>; def : InstRW<[M4WriteFCVT3], (instregex "^F(RECP|RSQRT)Ev[248]f(32|64)")>; def : InstRW<[M4WriteFCVT3], (instregex "^U(RECP|RSQRT)Ev[24]i32")>; -def : InstRW<[M4WriteFMAC4H, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f16")>; -def : InstRW<[M4WriteFMAC4, - M4ReadFMACM1], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; +def : InstRW<[M4WriteFMAC4H], (instregex "^F(RECP|RSQRT)Sv.f16")>; +def : InstRW<[M4WriteFMAC4], (instregex "^F(RECP|RSQRT)Sv.f(32|64)")>; def : InstRW<[M4WriteNSHF1], (instregex "^REV(16|32|64)v")>; def : InstRW<[M4WriteNSHFA], (instregex "^TB[LX]v(8|16)i8One")>; def : InstRW<[M4WriteNSHFB], (instregex "^TB[LX]v(8|16)i8Two")>; diff --git a/lib/Target/AArch64/AArch64SchedPredExynos.td b/lib/Target/AArch64/AArch64SchedPredExynos.td index 48c54230e9d8..316036d89406 100644 --- a/lib/Target/AArch64/AArch64SchedPredExynos.td +++ b/lib/Target/AArch64/AArch64SchedPredExynos.td @@ -103,17 +103,6 @@ def ExynosScaledIdxPred : MCSchedPredicate<ExynosScaledIdxFn>; // Identify FP instructions. def ExynosFPPred : MCSchedPredicate<CheckAny<[CheckDForm, CheckQForm]>>; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def ExynosLongVectorUpperFn : TIIPredicate< - "isExynosLongVectorUpper", - MCOpcodeSwitchStatement< - [MCOpcodeSwitchCase< - IsLongVectorUpperOp.ValidOpcodes, - MCReturnStatement<TruePred>>], - MCReturnStatement<FalsePred>>>; -def ExynosLongVectorUpperPred : MCSchedPredicate<ExynosLongVectorUpperFn>; - // Identify 128-bit NEON instructions. def ExynosQFormPred : MCSchedPredicate<CheckQForm>; diff --git a/lib/Target/AArch64/AArch64SchedPredicates.td b/lib/Target/AArch64/AArch64SchedPredicates.td index dbaf11fc95dd..b23572b41b9c 100644 --- a/lib/Target/AArch64/AArch64SchedPredicates.td +++ b/lib/Target/AArch64/AArch64SchedPredicates.td @@ -268,59 +268,6 @@ def IsStoreRegOffsetOp : CheckOpcode<[STRBBroW, STRBBroX, def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpcodes, IsStoreRegOffsetOp.ValidOpcodes)>; -// Identify whether an instruction whose result is a long vector -// operates on the upper half of the input registers. -def IsLongVectorUpperOp : CheckOpcode<[FCVTLv8i16, FCVTLv4i32, - FCVTNv8i16, FCVTNv4i32, - FCVTXNv4f32, - PMULLv16i8, PMULLv2i64, - RADDHNv8i16_v16i8, RADDHNv4i32_v8i16, RADDHNv2i64_v4i32, - RSHRNv16i8_shift, RSHRNv8i16_shift, RSHRNv4i32_shift, - RSUBHNv8i16_v16i8, RSUBHNv4i32_v8i16, RSUBHNv2i64_v4i32, - SABALv16i8_v8i16, SABALv8i16_v4i32, SABALv4i32_v2i64, - SABDLv16i8_v8i16, SABDLv8i16_v4i32, SABDLv4i32_v2i64, - SADDLv16i8_v8i16, SADDLv8i16_v4i32, SADDLv4i32_v2i64, - SADDWv16i8_v8i16, SADDWv8i16_v4i32, SADDWv4i32_v2i64, - SHLLv16i8, SHLLv8i16, SHLLv4i32, - SHRNv16i8_shift, SHRNv8i16_shift, SHRNv4i32_shift, - SMLALv16i8_v8i16, SMLALv8i16_v4i32, SMLALv4i32_v2i64, - SMLALv8i16_indexed, SMLALv4i32_indexed, - SMLSLv16i8_v8i16, SMLSLv8i16_v4i32, SMLSLv4i32_v2i64, - SMLSLv8i16_indexed, SMLSLv4i32_indexed, - SMULLv16i8_v8i16, SMULLv8i16_v4i32, SMULLv4i32_v2i64, - SMULLv8i16_indexed, SMULLv4i32_indexed, - SQDMLALv8i16_v4i32, SQDMLALv4i32_v2i64, - SQDMLALv8i16_indexed, SQDMLALv4i32_indexed, - SQDMLSLv8i16_v4i32, SQDMLSLv4i32_v2i64, - SQDMLSLv8i16_indexed, SQDMLSLv4i32_indexed, - SQDMULLv8i16_v4i32, SQDMULLv4i32_v2i64, - SQDMULLv8i16_indexed, SQDMULLv4i32_indexed, - SQRSHRNv16i8_shift, SQRSHRNv8i16_shift, SQRSHRNv4i32_shift, - SQRSHRUNv16i8_shift, SQRSHRUNv8i16_shift, SQRSHRUNv4i32_shift, - SQSHRNv16i8_shift, SQSHRNv8i16_shift, SQSHRNv4i32_shift, - SQSHRUNv16i8_shift, SQSHRUNv8i16_shift, SQSHRUNv4i32_shift, - SQXTNv16i8, SQXTNv8i16, SQXTNv4i32, - SQXTUNv16i8, SQXTUNv8i16, SQXTUNv4i32, - SSHLLv16i8_shift, SSHLLv8i16_shift, SSHLLv4i32_shift, - SSUBLv16i8_v8i16, SSUBLv8i16_v4i32, SSUBLv4i32_v2i64, - SSUBWv16i8_v8i16, SSUBWv8i16_v4i32, SSUBWv4i32_v2i64, - UABALv16i8_v8i16, UABALv8i16_v4i32, UABALv4i32_v2i64, - UABDLv16i8_v8i16, UABDLv8i16_v4i32, UABDLv4i32_v2i64, - UADDLv16i8_v8i16, UADDLv8i16_v4i32, UADDLv4i32_v2i64, - UADDWv16i8_v8i16, UADDWv8i16_v4i32, UADDWv4i32_v2i64, - UMLALv16i8_v8i16, UMLALv8i16_v4i32, UMLALv4i32_v2i64, - UMLALv8i16_indexed, UMLALv4i32_indexed, - UMLSLv16i8_v8i16, UMLSLv8i16_v4i32, UMLSLv4i32_v2i64, - UMLSLv8i16_indexed, UMLSLv4i32_indexed, - UMULLv16i8_v8i16, UMULLv8i16_v4i32, UMULLv4i32_v2i64, - UMULLv8i16_indexed, UMULLv4i32_indexed, - UQSHRNv16i8_shift, UQSHRNv8i16_shift, UQSHRNv4i32_shift, - UQXTNv16i8, UQXTNv8i16, UQXTNv4i32, - USHLLv16i8_shift, USHLLv8i16_shift, USHLLv4i32_shift, - USUBLv16i8_v8i16, USUBLv8i16_v4i32, USUBLv4i32_v2i64, - USUBWv16i8_v8i16, USUBWv8i16_v4i32, USUBWv4i32_v2i64, - XTNv16i8, XTNv8i16, XTNv4i32]>; - // Target predicates. // Identify an instruction that effectively transfers a register to another. diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp index f4e866958369..d679abd107d2 100644 --- a/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -201,49 +201,55 @@ static bool updateOperand(FoldCandidate &Fold, Mod.setImm(Mod.getImm() & ~SISrcMods::OP_SEL_1); } } + } - if (Fold.needsShrink()) { - MachineBasicBlock *MBB = MI->getParent(); - auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); - if (Liveness != MachineBasicBlock::LQR_Dead) - return false; - - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - int Op32 = Fold.getShrinkOpcode(); - MachineOperand &Dst0 = MI->getOperand(0); - MachineOperand &Dst1 = MI->getOperand(1); - assert(Dst0.isDef() && Dst1.isDef()); - - bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); + if ((Fold.isImm() || Fold.isFI()) && Fold.needsShrink()) { + MachineBasicBlock *MBB = MI->getParent(); + auto Liveness = MBB->computeRegisterLiveness(&TRI, AMDGPU::VCC, MI); + if (Liveness != MachineBasicBlock::LQR_Dead) + return false; - const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); - unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - const TargetRegisterClass *Dst1RC = MRI.getRegClass(Dst1.getReg()); - unsigned NewReg1 = MRI.createVirtualRegister(Dst1RC); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + int Op32 = Fold.getShrinkOpcode(); + MachineOperand &Dst0 = MI->getOperand(0); + MachineOperand &Dst1 = MI->getOperand(1); + assert(Dst0.isDef() && Dst1.isDef()); - MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); + bool HaveNonDbgCarryUse = !MRI.use_nodbg_empty(Dst1.getReg()); - if (HaveNonDbgCarryUse) { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) - .addReg(AMDGPU::VCC, RegState::Kill); - } + const TargetRegisterClass *Dst0RC = MRI.getRegClass(Dst0.getReg()); + unsigned NewReg0 = MRI.createVirtualRegister(Dst0RC); - // Keep the old instruction around to avoid breaking iterators, but - // replace the outputs with dummy registers. - Dst0.setReg(NewReg0); - Dst1.setReg(NewReg1); + MachineInstr *Inst32 = TII.buildShrunkInst(*MI, Op32); - if (Fold.isCommuted()) - TII.commuteInstruction(*Inst32, false); - return true; + if (HaveNonDbgCarryUse) { + BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::COPY), Dst1.getReg()) + .addReg(AMDGPU::VCC, RegState::Kill); } - Old.ChangeToImmediate(Fold.ImmToFold); + // Keep the old instruction around to avoid breaking iterators, but + // replace it with a dummy instruction to remove uses. + // + // FIXME: We should not invert how this pass looks at operands to avoid + // this. Should track set of foldable movs instead of looking for uses + // when looking at a use. + Dst0.setReg(NewReg0); + for (unsigned I = MI->getNumOperands() - 1; I > 0; --I) + MI->RemoveOperand(I); + MI->setDesc(TII.get(AMDGPU::IMPLICIT_DEF)); + + if (Fold.isCommuted()) + TII.commuteInstruction(*Inst32, false); return true; } assert(!Fold.needsShrink() && "not handled"); + if (Fold.isImm()) { + Old.ChangeToImmediate(Fold.ImmToFold); + return true; + } + if (Fold.isFI()) { Old.ChangeToFrameIndex(Fold.FrameIndexToFold); return true; @@ -344,7 +350,7 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, if ((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64 || Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME - OpToFold->isImm()) { + (OpToFold->isImm() || OpToFold->isFI())) { MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); // Verify the other operand is a VGPR, otherwise we would violate the @@ -357,7 +363,10 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList, assert(MI->getOperand(1).isDef()); - int Op32 = AMDGPU::getVOPe32(Opc); + // Make sure to get the 32-bit version of the commuted opcode. + unsigned MaybeCommutedOpc = MI->getOpcode(); + int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc); + FoldList.push_back(FoldCandidate(MI, CommuteOpNo, OpToFold, true, Op32)); return true; diff --git a/lib/Target/AMDGPU/VOP2Instructions.td b/lib/Target/AMDGPU/VOP2Instructions.td index e3fd7b5f9fad..8cf524a5128d 100644 --- a/lib/Target/AMDGPU/VOP2Instructions.td +++ b/lib/Target/AMDGPU/VOP2Instructions.td @@ -515,18 +515,12 @@ let AddedComplexity = 1 in { } let SubtargetPredicate = HasAddNoCarryInsts in { - def : DivergentBinOp<add, V_ADD_U32_e32>; - def : DivergentBinOp<sub, V_SUB_U32_e32>; - def : DivergentBinOp<sub, V_SUBREV_U32_e32>; + def : DivergentBinOp<add, V_ADD_U32_e64>; + def : DivergentBinOp<sub, V_SUB_U32_e64>; } - -def : DivergentBinOp<add, V_ADD_I32_e32>; - def : DivergentBinOp<add, V_ADD_I32_e64>; -def : DivergentBinOp<sub, V_SUB_I32_e32>; - -def : DivergentBinOp<sub, V_SUBREV_I32_e32>; +def : DivergentBinOp<sub, V_SUB_I32_e64>; def : DivergentBinOp<srl, V_LSHRREV_B32_e32>; def : DivergentBinOp<sra, V_ASHRREV_I32_e32>; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 21de0f6a7630..7e90edbbdabb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1984,32 +1984,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - // Tail call byval lowering might overwrite argument registers so in case of - // tail call optimization the copies to registers are lowered later. - if (!isTailCall) - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // For tail calls lower the arguments to the 'real' stack slot. - if (isTailCall) { - // Force all the incoming stack arguments to be loaded from the stack - // before any new outgoing arguments are stored to the stack, because the - // outgoing stack slots may alias the incoming argument stack slots, and - // the alias isn't otherwise explicit. This is slightly more conservative - // than necessary, because it means that each store effectively depends - // on every argument instead of just those arguments it would clobber. - - // Do not flag preceding copytoreg stuff together with the following stuff. - InFlag = SDValue(); - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index 57fc978b54bb..5db757782322 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -26,19 +26,21 @@ #include "AVR.h" #include "AVRMachineFunctionInfo.h" +#include "AVRSubtarget.h" #include "AVRTargetMachine.h" #include "MCTargetDesc/AVRMCTargetDesc.h" namespace llvm { -AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) - : TargetLowering(tm) { +AVRTargetLowering::AVRTargetLowering(const AVRTargetMachine &TM, + const AVRSubtarget &STI) + : TargetLowering(TM), Subtarget(STI) { // Set up the register classes. addRegisterClass(MVT::i8, &AVR::GPR8RegClass); addRegisterClass(MVT::i16, &AVR::DREGSRegClass); // Compute derived properties from the register classes. - computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo()); + computeRegisterProperties(Subtarget.getRegisterInfo()); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); @@ -163,6 +165,13 @@ AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm) setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + // Expand multiplications to libcalls when there is + // no hardware MUL. + if (!Subtarget.supportsMultiplication()) { + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + } + for (MVT VT : MVT::integer_valuetypes()) { setOperationAction(ISD::MULHS, VT, Expand); setOperationAction(ISD::MULHU, VT, Expand); @@ -1271,7 +1280,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv); assert(Mask && "Missing call preserved mask for calling convention"); @@ -1434,7 +1443,7 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI, MachineFunction *F = BB->getParent(); MachineRegisterInfo &RI = F->getRegInfo(); const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); switch (MI.getOpcode()) { @@ -1575,7 +1584,7 @@ static bool isCopyMulResult(MachineBasicBlock::iterator const &I) { MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI, MachineBasicBlock *BB) const { const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine(); - const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); MachineBasicBlock::iterator I(MI); ++I; // in any case insert *after* the mul instruction if (isCopyMulResult(I)) @@ -1838,9 +1847,6 @@ std::pair<unsigned, const TargetRegisterClass *> AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { - auto STI = static_cast<const AVRTargetMachine &>(this->getTargetMachine()) - .getSubtargetImpl(); - // We only support i8 and i16. // //:FIXME: remove this assert for now since it gets sometimes executed @@ -1884,8 +1890,8 @@ AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } } - return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(), - Constraint, VT); + return TargetLowering::getRegForInlineAsmConstraint( + Subtarget.getRegisterInfo(), Constraint, VT); } void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op, diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h index c90c65c81f70..7d77dd8fb018 100644 --- a/lib/Target/AVR/AVRISelLowering.h +++ b/lib/Target/AVR/AVRISelLowering.h @@ -64,12 +64,14 @@ enum NodeType { } // end of namespace AVRISD +class AVRSubtarget; class AVRTargetMachine; /// Performs target lowering for the AVR. class AVRTargetLowering : public TargetLowering { public: - explicit AVRTargetLowering(AVRTargetMachine &TM); + explicit AVRTargetLowering(const AVRTargetMachine &TM, + const AVRSubtarget &STI); public: MVT getScalarShiftAmountTy(const DataLayout &, EVT LHSTy) const override { @@ -164,6 +166,10 @@ private: const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; +protected: + + const AVRSubtarget &Subtarget; + private: MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const; diff --git a/lib/Target/AVR/AVRSubtarget.cpp b/lib/Target/AVR/AVRSubtarget.cpp index 556d69ec5234..c7c566270f43 100644 --- a/lib/Target/AVR/AVRSubtarget.cpp +++ b/lib/Target/AVR/AVRSubtarget.cpp @@ -29,9 +29,9 @@ namespace llvm { AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, AVRTargetMachine &TM) + const std::string &FS, const AVRTargetMachine &TM) : AVRGenSubtargetInfo(TT, CPU, FS), InstrInfo(), FrameLowering(), - TLInfo(TM), TSInfo(), + TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)), TSInfo(), // Subtarget features m_hasSRAM(false), m_hasJMPCALL(false), m_hasIJMPCALL(false), @@ -44,4 +44,12 @@ AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU, ParseSubtargetFeatures(CPU, FS); } +AVRSubtarget & +AVRSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM) { + // Parse features string. + ParseSubtargetFeatures(CPU, FS); + return *this; +} + } // end of namespace llvm diff --git a/lib/Target/AVR/AVRSubtarget.h b/lib/Target/AVR/AVRSubtarget.h index fa26738da190..ba036d5e4061 100644 --- a/lib/Target/AVR/AVRSubtarget.h +++ b/lib/Target/AVR/AVRSubtarget.h @@ -37,7 +37,7 @@ public: //! \param FS The feature string. //! \param TM The target machine. AVRSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - AVRTargetMachine &TM); + const AVRTargetMachine &TM); const AVRInstrInfo *getInstrInfo() const override { return &InstrInfo; } const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } @@ -49,6 +49,9 @@ public: /// \note Definition of function is auto generated by `tblgen`. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + AVRSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine &TM); + // Subtarget feature getters. // See AVR.td for details. bool hasSRAM() const { return m_hasSRAM; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index 58f9717e1cc6..a46f84bd1c9c 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -700,8 +700,11 @@ void MipsTargetAsmStreamer::emitDirectiveCpreturn(unsigned SaveLocation, } void MipsTargetAsmStreamer::emitDirectiveModuleFP() { - OS << "\t.module\tfp="; - OS << ABIFlagsSection.getFpABIString(ABIFlagsSection.getFpABI()) << "\n"; + MipsABIFlagsSection::FpABIKind FpABI = ABIFlagsSection.getFpABI(); + if (FpABI == MipsABIFlagsSection::FpABIKind::SOFT) + OS << "\t.module\tsoftfloat\n"; + else + OS << "\t.module\tfp=" << ABIFlagsSection.getFpABIString(FpABI) << "\n"; } void MipsTargetAsmStreamer::emitDirectiveSetFp( diff --git a/lib/Target/Mips/MicroMips32r6InstrInfo.td b/lib/Target/Mips/MicroMips32r6InstrInfo.td index c441aa76ad40..994a8882f942 100644 --- a/lib/Target/Mips/MicroMips32r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips32r6InstrInfo.td @@ -1040,7 +1040,7 @@ class TRUNC_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.l.d", FGR64Opnd, class TRUNC_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>; class TRUNC_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"trunc.w.d", FGR32Opnd, - AFGR64Opnd, II_TRUNC>; + FGR64Opnd, II_TRUNC>; class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>; class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd, @@ -1750,6 +1750,8 @@ def : MipsPat<(f32 fpimm0), (MTC1_MMR6 ZERO)>, ISA_MICROMIPS32R6; def : MipsPat<(f32 fpimm0neg), (FNEG_S_MMR6 (MTC1_MMR6 ZERO))>, ISA_MICROMIPS32R6; def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), (TRUNC_W_D_MMR6 FGR64Opnd:$src)>, ISA_MICROMIPS32R6; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_W_S_MMR6 FGR32Opnd:$src)>, ISA_MICROMIPS32R6; def : MipsPat<(and GPRMM16:$src, immZExtAndi16:$imm), (ANDI16_MMR6 GPRMM16:$src, immZExtAndi16:$imm)>, diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td index 1731afc1961f..9e76165e7ad7 100644 --- a/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/lib/Target/Mips/MicroMipsInstrFPU.td @@ -425,6 +425,11 @@ def : MipsPat<(f64 (fpextend FGR32Opnd:$src)), def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src), (TRUNC_W_MM AFGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6, FGR_32; +def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src), + (CVT_W_D64_MM FGR64Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6, + FGR_64; +def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src), + (TRUNC_W_S_MM FGR32Opnd:$src)>, ISA_MICROMIPS32_NOT_MIPS32R6; // Selects defm : MovzPats0<GPR32, FGR32, MOVZ_I_S_MM, SLT_MM, SLTu_MM, SLTi_MM, SLTiu_MM>, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index a7a748b0840e..c35f5beb6880 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -813,7 +813,8 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // We should always emit a '.module fp=...' but binutils 2.24 does not accept // it. We therefore emit it when it contradicts the ABI defaults (-mfpxx or // -mfp64) and omit it otherwise. - if (ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) + if ((ABI.IsO32() && (STI.isABI_FPXX() || STI.isFP64bit())) || + STI.useSoftFloat()) TS.emitDirectiveModuleFP(); // We should always emit a '.module [no]oddspreg' but binutils 2.24 does not diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index b9824220b558..a4078026e4f9 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -1314,7 +1314,9 @@ def PseudoCMPU_LE_QB : PseudoCMP<CMPU_LE_QB>; def PseudoPICK_PH : PseudoPICK<PICK_PH>; def PseudoPICK_QB : PseudoPICK<PICK_QB>; -def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>; +let AdditionalPredicates = [HasDSP] in { + def PseudoMTLOHI_DSP : PseudoMTLOHI<ACC64DSP, GPR32>; +} // Patterns. class DSPPat<dag pattern, dag result, Predicate pred = HasDSP> : diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index e3823e0dfdb8..61e77fbeea6d 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -726,6 +726,7 @@ bool MipsDelaySlotFiller::searchRange(MachineBasicBlock &MBB, IterTy Begin, // but we don't have enough information to make that decision. if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 && (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch || + Opcode == Mips::PseudoIndirectBranch_MM || Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL)) continue; // Instructions LWP/SWP and MOVEP should not be in a delay slot as that diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index a18416b9e861..168750b2cba9 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -954,21 +954,34 @@ bool MipsFastISel::selectBranch(const Instruction *I) { // MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - // For now, just try the simplest case where it's fed by a compare. + + // Fold the common case of a conditional branch with a comparison + // in the same block. + unsigned ZExtCondReg = 0; if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - MVT CIMVT = - TLI.getValueType(DL, CI->getOperand(0)->getType(), true).getSimpleVT(); - if (CIMVT == MVT::i1) + if (CI->hasOneUse() && CI->getParent() == I->getParent()) { + ZExtCondReg = createResultReg(&Mips::GPR32RegClass); + if (!emitCmp(ZExtCondReg, CI)) + return false; + } + } + + // For the general case, we need to mask with 1. + if (ZExtCondReg == 0) { + unsigned CondReg = getRegForValue(BI->getCondition()); + if (CondReg == 0) return false; - unsigned CondReg = getRegForValue(CI); - BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) - .addReg(CondReg) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; + ZExtCondReg = emitIntExt(MVT::i1, CondReg, MVT::i32, true); + if (ZExtCondReg == 0) + return false; } - return false; + + BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::BGTZ)) + .addReg(ZExtCondReg) + .addMBB(TBB); + finishCondBranch(BI->getParent(), TBB, FBB); + return true; } bool MipsFastISel::selectCmp(const Instruction *I) { diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index c7ab90ed2a3b..2b26caaa9f49 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -447,6 +447,9 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case Mips::PseudoMTLOHI_DSP: expandPseudoMTLoHi(MBB, MI, Mips::MTLO_DSP, Mips::MTHI_DSP, true); break; + case Mips::PseudoMTLOHI_MM: + expandPseudoMTLoHi(MBB, MI, Mips::MTLO_MM, Mips::MTHI_MM, false); + break; case Mips::PseudoCVT_S_W: expandCvtFPInt(MBB, MI, Mips::CVT_S_W, Mips::MTC1, false); break; diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp index 26869f250823..cce239cac970 100644 --- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp +++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp @@ -61,6 +61,14 @@ extern "C" void LLVMInitializePowerPCDisassembler() { createPPCLEDisassembler); } +static DecodeStatus DecodePCRel24BranchTarget(MCInst &Inst, unsigned Imm, + uint64_t Addr, + const void *Decoder) { + int32_t Offset = SignExtend32<24>(Imm); + Inst.addOperand(MCOperand::createImm(Offset)); + return MCDisassembler::Success; +} + // FIXME: These can be generated by TableGen from the existing register // encoding values! diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp index fc29e4effbb1..6824168b890d 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -382,8 +382,11 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo, // Branches can take an immediate operand. This is used by the branch // selection pass to print .+8, an eight byte displacement from the PC. - O << ".+"; - printAbsBranchOperand(MI, OpNo, O); + O << "."; + int32_t Imm = SignExtend32<32>((unsigned)MI->getOperand(OpNo).getImm() << 2); + if (Imm >= 0) + O << "+"; + O << Imm; } void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp index a1e4e07b25af..78609ef3d4e0 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp @@ -15,6 +15,7 @@ #include "InstPrinter/PPCInstPrinter.h" #include "MCTargetDesc/PPCMCAsmInfo.h" #include "PPCTargetStreamer.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/BinaryFormat/ELF.h" @@ -182,16 +183,33 @@ public: void emitAssignment(MCSymbol *S, const MCExpr *Value) override { auto *Symbol = cast<MCSymbolELF>(S); + // When encoding an assignment to set symbol A to symbol B, also copy // the st_other bits encoding the local entry point offset. - if (Value->getKind() != MCExpr::SymbolRef) - return; - const auto &RhsSym = cast<MCSymbolELF>( - static_cast<const MCSymbolRefExpr *>(Value)->getSymbol()); - unsigned Other = Symbol->getOther(); + if (copyLocalEntry(Symbol, Value)) + UpdateOther.insert(Symbol); + else + UpdateOther.erase(Symbol); + } + + void finish() override { + for (auto *Sym : UpdateOther) + copyLocalEntry(Sym, Sym->getVariableValue()); + } + +private: + SmallPtrSet<MCSymbolELF *, 32> UpdateOther; + + bool copyLocalEntry(MCSymbolELF *D, const MCExpr *S) { + auto *Ref = dyn_cast<const MCSymbolRefExpr>(S); + if (!Ref) + return false; + const auto &RhsSym = cast<MCSymbolELF>(Ref->getSymbol()); + unsigned Other = D->getOther(); Other &= ~ELF::STO_PPC64_LOCAL_MASK; Other |= RhsSym.getOther() & ELF::STO_PPC64_LOCAL_MASK; - Symbol->setOther(Other); + D->setOther(Other); + return true; } }; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 31acd0ff870f..70e9049a2ab3 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4359,8 +4359,8 @@ void PPCDAGToDAGISel::Select(SDNode *N) { const Module *M = MF->getFunction().getParent(); if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 || - !PPCSubTarget->isSecurePlt() || !PPCSubTarget->isTargetELF() || - M->getPICLevel() == PICLevel::SmallPIC) + (!TM.isPositionIndependent() || !PPCSubTarget->isSecurePlt()) || + !PPCSubTarget->isTargetELF() || M->getPICLevel() == PICLevel::SmallPIC) break; SDValue Op = N->getOperand(1); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index dd3f1ac79089..77aa4fe3d415 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -737,7 +737,9 @@ def abscondbrtarget : Operand<OtherVT> { def calltarget : Operand<iPTR> { let PrintMethod = "printBranchOperand"; let EncoderMethod = "getDirectBrEncoding"; + let DecoderMethod = "DecodePCRel24BranchTarget"; let ParserMatchClass = PPCDirectBrAsmOperand; + let OperandType = "OPERAND_PCREL"; } def abscalltarget : Operand<iPTR> { let PrintMethod = "printAbsBranchOperand"; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index c0cbfd779cb9..1fdf74549dec 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -138,6 +138,9 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isDarwin()) HasLazyResolverStubs = true; + if (TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD()) + SecurePlt = true; + if (HasSPE && IsPPC64) report_fatal_error( "SPE is only supported for 32-bit targets.\n", false); if (HasSPE && (HasAltivec || HasQPX || HasVSX || HasFPU)) diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 33caa66154ff..ad6ea3760fee 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -189,7 +189,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::STDFri)) .addReg(FrameReg).addImm(0).addReg(SrcEvenReg); - replaceFI(MF, II, *StMI, dl, 0, Offset, FrameReg); + replaceFI(MF, *StMI, *StMI, dl, 0, Offset, FrameReg); MI.setDesc(TII.get(SP::STDFri)); MI.getOperand(2).setReg(SrcOddReg); Offset += 8; @@ -201,7 +201,7 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr *StMI = BuildMI(*MI.getParent(), II, dl, TII.get(SP::LDDFri), DestEvenReg) .addReg(FrameReg).addImm(0); - replaceFI(MF, II, *StMI, dl, 1, Offset, FrameReg); + replaceFI(MF, *StMI, *StMI, dl, 1, Offset, FrameReg); MI.setDesc(TII.get(SP::LDDFri)); MI.getOperand(0).setReg(DestOddReg); diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 003848e34227..f7f29d85cbb2 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -669,13 +669,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsVarArg) { // Outgoing non-fixed arguments are placed in a buffer. First // compute their offsets and the total amount of buffer space needed. - for (SDValue Arg : - make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) { + const ISD::OutputArg &Out = Outs[I]; + SDValue &Arg = OutVals[I]; EVT VT = Arg.getValueType(); assert(VT != MVT::iPTR && "Legalized args should be concrete"); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Align = std::max(Out.Flags.getOrigAlign(), + Layout.getABITypeAlignment(Ty)); unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), - Layout.getABITypeAlignment(Ty)); + Align); CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), Offset, VT.getSimpleVT(), CCValAssign::Full)); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9dd3f2652543..12cd613c34cb 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -399,7 +399,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM, case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - if (IsNonTemporal && Alignment >= 16) + if (IsNonTemporal && Alignment >= 16 && HasSSE41) Opc = HasVLX ? X86::VMOVNTDQAZ128rm : HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm; else if (Alignment >= 16) diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index afcb49dc2263..217a12ddf896 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Pass.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -512,6 +513,9 @@ void X86PassConfig::addPreEmitPass2() { // correct CFA calculation rule where needed by inserting appropriate CFI // instructions. const Triple &TT = TM->getTargetTriple(); - if (!TT.isOSDarwin() && !TT.isOSWindows()) + const MCAsmInfo *MAI = TM->getMCAsmInfo(); + if (!TT.isOSDarwin() && + (!TT.isOSWindows() || + MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI)) addPass(createCFIInstrInserter()); } |