diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2011-05-02 19:34:44 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2011-05-02 19:34:44 +0000 |
commit | 6b943ff3a3f8617113ecbf611cf0f8957e4e19d2 (patch) | |
tree | fc5f365fb9035b2d0c622bbf06c9bbe8627d7279 /lib/Target/ARM | |
parent | d0e4e96dc17a6c1c6de3340842c80f0e187ba349 (diff) | |
download | src-6b943ff3a3f8617113ecbf611cf0f8957e4e19d2.tar.gz src-6b943ff3a3f8617113ecbf611cf0f8957e4e19d2.zip |
Vendor import of llvm trunk r130700:vendor/llvm/llvm-r130700
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=221337
svn path=/vendor/llvm/llvm-r130700/; revision=221338; tag=vendor/llvm/llvm-r130700
Diffstat (limited to 'lib/Target/ARM')
56 files changed, 5587 insertions, 2208 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index bf4315fc6c3e..6af5f85e8a85 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -51,6 +51,12 @@ def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", // to just not use them. def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", "Disable VFP / NEON MAC instructions">; + +// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; + // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", @@ -61,6 +67,14 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; +/// Some instructions update CPSR partially, which can add false dependency for +/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is +/// mapped to a separate physical register. Avoid partial CPSR update for these +/// processors. +def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", + "AvoidCPSRPartialUpdate", "true", + "Avoid CPSR partial update for OOO execution">; + // Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; @@ -100,11 +114,13 @@ def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others", def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureT2XtPk]>; + FeatureHasSlowFPVMLx, FeatureVMLxForwarding, + FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", - [FeatureHasSlowFPVMLx, FeatureT2XtPk, - FeatureFP16]>; + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR]>; class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, GenericItineraries, Features>; @@ -171,6 +187,8 @@ def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, ProcA8]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ArchV7A, ProcA9]>; +def : Processor<"cortex-a9-mp", CortexA9Itineraries, + [ArchV7A, ProcA9, FeatureMP]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 19fbf0548b02..595708fa7881 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -408,16 +408,18 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand is a reg if in // reg/reg form, otherwise it's reg#0. The third field encodes the operation - // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. + // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The + // fourth operand 16-17 encodes the index mode. // // If this addressing mode is a frame index (before prolog/epilog insertion // and code rewriting), this operand will have the form: FI#, reg0, <offs> // with no shift amount for the frame offset. // - static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) { + static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, + unsigned IdxMode = 0) { assert(Imm12 < (1 << 12) && "Imm too large!"); bool isSub = Opc == sub; - return Imm12 | ((int)isSub << 12) | (SO << 13); + return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ; } static inline unsigned getAM2Offset(unsigned AM2Opc) { return AM2Opc & ((1 << 12)-1); @@ -426,7 +428,10 @@ namespace ARM_AM { return ((AM2Opc >> 12) & 1) ? sub : add; } static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { - return (ShiftOpc)(AM2Opc >> 13); + return (ShiftOpc)((AM2Opc >> 13) & 7); + } + static inline unsigned getAM2IdxMode(unsigned AM2Opc) { + return (AM2Opc >> 16); } @@ -441,12 +446,14 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand is a reg if in // reg/reg form, otherwise it's reg#0. The third field encodes the operation - // in bit 8, the immediate in bits 0-7. + // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the + // index mode. /// getAM3Opc - This function encodes the addrmode3 opc field. - static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) { + static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, + unsigned IdxMode = 0) { bool isSub = Opc == sub; - return ((int)isSub << 8) | Offset; + return ((int)isSub << 8) | Offset | (IdxMode << 9); } static inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; @@ -454,6 +461,9 @@ namespace ARM_AM { static inline AddrOpc getAM3Op(unsigned AM3Opc) { return ((AM3Opc >> 8) & 1) ? sub : add; } + static inline unsigned getAM3IdxMode(unsigned AM3Opc) { + return (AM3Opc >> 9); + } //===--------------------------------------------------------------------===// // Addressing Mode #4 diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp index ec23449d7d42..f0628192308f 100644 --- a/lib/Target/ARM/ARMAsmBackend.cpp +++ b/lib/Target/ARM/ARMAsmBackend.cpp @@ -246,7 +246,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } uint32_t out = (opc << 21); - out |= (Value & 0x800) << 14; + out |= (Value & 0x800) << 15; out |= (Value & 0x700) << 4; out |= (Value & 0x0FF); @@ -416,21 +416,22 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, // FIXME: This should be in a separate file. class DarwinARMAsmBackend : public ARMAsmBackend { public: - DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { } - - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; + const object::mach::CPUSubtypeARM Subtype; + DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st) + : ARMAsmBackend(T), Subtype(st) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - // FIXME: Subtarget info should be derived. Force v7 for now. return createMachObjectWriter(new ARMMachObjectWriter( /*Is64Bit=*/false, object::mach::CTM_ARM, - object::mach::CSARM_V7), + Subtype), OS, /*IsLittleEndian=*/true); } + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { return false; } @@ -499,14 +500,17 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, TargetAsmBackend *llvm::createARMAsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: - return new DarwinARMAsmBackend(T); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - assert(0 && "Windows not supported on ARM"); - default: - return new ELFARMAsmBackend(T, Triple(TT).getOS()); + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) { + if (TheTriple.getArchName() == "armv6" || + TheTriple.getArchName() == "thumbv6") + return new DarwinARMAsmBackend(T, object::mach::CSARM_V6); + return new DarwinARMAsmBackend(T, object::mach::CSARM_V7); } + + if (TheTriple.isOSWindows()) + assert(0 && "Windows not supported on ARM"); + + return new ELFARMAsmBackend(T, Triple(TT).getOS()); } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index db12b8e4fc2d..c428e1852a46 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -88,6 +88,11 @@ namespace { case ARMBuildAttrs::CPU_name: Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String)); break; + /* GAS requires .fpu to be emitted regardless of EABI attribute */ + case ARMBuildAttrs::Advanced_SIMD_arch: + case ARMBuildAttrs::VFP_arch: + Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String)); + break; default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } @@ -167,6 +172,117 @@ getDebugValueLocation(const MachineInstr *MI) const { return Location; } +/// getDwarfRegOpSize - get size required to emit given machine location using +/// dwarf encoding. +unsigned ARMAsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) + return AsmPrinter::getDwarfRegOpSize(MLoc); + else { + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + unsigned Rx = 256 + (SReg >> 1); + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB + // 1 + ULEB(Rx) + 1 + 1 + 1 + return 4 + MCAsmInfo::getULEB128Size(Rx); + } + + if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) + + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8); + // 6 + ULEB(D1) + ULEB(D2) + return 6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2); + } + } + return 0; +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) + AsmPrinter::EmitDwarfRegOp(MLoc); + else { + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + bool odd = SReg & 0x1; + unsigned Rx = 256 + (SReg >> 1); + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB + // 1 + ULEB(Rx) + 1 + 1 + 1 + EmitInt16(4 + MCAsmInfo::getULEB128Size(Rx)); + + OutStreamer.AddComment("DW_OP_regx for S register"); + EmitInt8(dwarf::DW_OP_regx); + + OutStreamer.AddComment(Twine(SReg)); + EmitULEB128(Rx); + + if (odd) { + OutStreamer.AddComment("DW_OP_bit_piece 32 32"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(32); + } else { + OutStreamer.AddComment("DW_OP_bit_piece 32 0"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(0); + } + } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) + + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8); + // 6 + ULEB(D1) + ULEB(D2) + EmitInt16(6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2)); + + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D1); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + + OutStreamer.AddComment("DW_OP_regx for Q register: D2"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D2); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + } + } +} + void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { OutStreamer.EmitAssemblerFlag(MCAF_Code16); @@ -453,10 +569,13 @@ void ARMAsmPrinter::emitAttributes() { emitARMAttributeSection(); + /* GAS expect .fpu to be emitted, regardless of VFP build attribute */ + bool emitFPU = false; AttributeEmitter *AttrEmitter; - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { AttrEmitter = new AsmAttributeEmitter(OutStreamer); - else { + emitFPU = true; + } else { MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer); AttrEmitter = new ObjectAttributeEmitter(O); } @@ -490,10 +609,36 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); } - // FIXME: Emit FPU type - if (Subtarget->hasVFP2()) + if (Subtarget->hasNEON() && emitFPU) { + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/vfpv3/vfpv2 for .fpu parameters */ + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); + /* If emitted for NEON, omit from VFP below, since you can have both + * NEON and VFP in build attributes but only one .fpu */ + emitFPU = false; + } + + /* VFPv3 + .fpu */ + if (Subtarget->hasVFP3()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv3A); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3"); + + /* VFPv2 + .fpu */ + } else if (Subtarget->hasVFP2()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv2); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2"); + } + + /* TODO: ARMBuildAttrs::Allowed is not completely accurate, + * since NEON can have 1 (allowed) or 2 (fused MAC operations) */ + if (Subtarget->hasNEON()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::Allowed); + } // Signal various FP modes. if (!UnsafeFPMath) { @@ -777,10 +922,161 @@ void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI, OutStreamer.EmitInstruction(TmpInst); } +void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { + assert(MI->getFlag(MachineInstr::FrameSetup) && + "Only instruction which are involved into frame setup code are allowed"); + + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>(); + + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned Opc = MI->getOpcode(); + unsigned SrcReg, DstReg; + + if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) { + // Two special cases: + // 1) tPUSH does not have src/dst regs. + // 2) for Thumb1 code we sometimes materialize the constant via constpool + // load. Yes, this is pretty fragile, but for now I don't see better + // way... :( + SrcReg = DstReg = ARM::SP; + } else { + SrcReg = MI->getOperand(1).getReg(); + DstReg = MI->getOperand(0).getReg(); + } + + // Try to figure out the unwinding opcode out of src / dst regs. + if (MI->getDesc().mayStore()) { + // Register saves. + assert(DstReg == ARM::SP && + "Only stack pointer as a destination reg is supported"); + + SmallVector<unsigned, 4> RegList; + // Skip src & dst reg, and pred ops. + unsigned StartOp = 2 + 2; + // Use all the operands. + unsigned NumOffset = 0; + + switch (Opc) { + default: + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + case ARM::tPUSH: + // Special case here: no src & dst reg, but two extra imp ops. + StartOp = 2; NumOffset = 2; + case ARM::STMDB_UPD: + case ARM::t2STMDB_UPD: + case ARM::VSTMDDB_UPD: + assert(SrcReg == ARM::SP && + "Only stack pointer as a source reg is supported"); + for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset; + i != NumOps; ++i) + RegList.push_back(MI->getOperand(i).getReg()); + break; + case ARM::STR_PRE: + assert(MI->getOperand(2).getReg() == ARM::SP && + "Only stack pointer as a source reg is supported"); + RegList.push_back(SrcReg); + break; + } + OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + } else { + // Changes of stack / frame pointer. + if (SrcReg == ARM::SP) { + int64_t Offset = 0; + switch (Opc) { + default: + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + case ARM::MOVr: + case ARM::tMOVgpr2gpr: + case ARM::tMOVgpr2tgpr: + Offset = 0; + break; + case ARM::ADDri: + Offset = -MI->getOperand(2).getImm(); + break; + case ARM::SUBri: + case ARM::t2SUBrSPi: + Offset = MI->getOperand(2).getImm(); + break; + case ARM::tSUBspi: + Offset = MI->getOperand(2).getImm()*4; + break; + case ARM::tADDspi: + case ARM::tADDrSPi: + Offset = -MI->getOperand(2).getImm()*4; + break; + case ARM::tLDRpci: { + // Grab the constpool index and check, whether it corresponds to + // original or cloned constpool entry. + unsigned CPI = MI->getOperand(1).getIndex(); + const MachineConstantPool *MCP = MF.getConstantPool(); + if (CPI >= MCP->getConstants().size()) + CPI = AFI.getOriginalCPIdx(CPI); + assert(CPI != -1U && "Invalid constpool index"); + + // Derive the actual offset. + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry"); + // FIXME: Check for user, it should be "add" instruction! + Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue(); + break; + } + } + + if (DstReg == FramePtr && FramePtr != ARM::SP) + // Set-up of the frame pointer. Positive values correspond to "add" + // instruction. + OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset); + else if (DstReg == ARM::SP) { + // Change of SP by an offset. Positive values correspond to "sub" + // instruction. + OutStreamer.EmitPad(Offset); + } else { + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + } else if (DstReg == ARM::SP) { + // FIXME: .movsp goes here + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + else { + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + } +} + +extern cl::opt<bool> EnableARMEHABI; + void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); switch (Opc) { default: break; + case ARM::B: { + // B is just a Bcc with an 'always' predicate. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::Bcc); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::LDMIA_RET: { + // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as + // such has additional code-gen properties and scheduling information. + // To emit it, we just construct as normal and set the opcode to LDMIA_UPD. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::LDMIA_UPD); + OutStreamer.EmitInstruction(TmpInst); + return; + } case ARM::t2ADDrSPi: case ARM::t2ADDrSPi12: case ARM::t2SUBrSPi: @@ -850,6 +1146,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } + // Darwin call instructions are just normal call instructions with different + // clobber semantics (they clobber R9). + case ARM::BLr9: + case ARM::BLr9_pred: + case ARM::BLXr9: + case ARM::BLXr9_pred: { + unsigned newOpc; + switch (Opc) { + default: assert(0); + case ARM::BLr9: newOpc = ARM::BL; break; + case ARM::BLr9_pred: newOpc = ARM::BL_pred; break; + case ARM::BLXr9: newOpc = ARM::BLX; break; + case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break; + } + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(newOpc); + OutStreamer.EmitInstruction(TmpInst); + return; + } case ARM::BXr9_CALL: case ARM::BX_CALL: { { @@ -1502,6 +1818,49 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } + // Tail jump branches are really just branch instructions with additional + // code-gen attributes. Convert them to the canonical form here. + case ARM::TAILJMPd: + case ARM::TAILJMPdND: { + MCInst TmpInst, TmpInst2; + // Lower the instruction as-is to get the operands properly converted. + LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); + TmpInst.setOpcode(ARM::Bcc); + TmpInst.addOperand(TmpInst2.getOperand(0)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::tTAILJMPd: + case ARM::tTAILJMPdND: { + MCInst TmpInst, TmpInst2; + LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); + TmpInst.setOpcode(ARM::tB); + TmpInst.addOperand(TmpInst2.getOperand(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::TAILJMPrND: + case ARM::tTAILJMPrND: + case ARM::TAILJMPr: + case ARM::tTAILJMPr: { + unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND) + ? ARM::BX : ARM::tBX; + MCInst TmpInst; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + // These are the pseudos created to comply with stricter operand restrictions // on ARMv5. Lower them now to "normal" instructions, since all the // restrictions are already satisfied. @@ -1530,6 +1889,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + + // Emit unwinding stuff for frame-related instructions + if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) + EmitUnwindingInstruction(MI); + OutStreamer.EmitInstruction(TmpInst); } @@ -1538,10 +1902,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { //===----------------------------------------------------------------------===// static MCInstPrinter *createARMMCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI); + return new ARMInstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 585268442ce4..1ee1b7024d15 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -82,11 +82,20 @@ private: // Generic helper used to emit e.g. ARMv5 mul pseudos void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc); + void EmitUnwindingInstruction(const MachineInstr *MI); + public: void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + /// getDwarfRegOpSize - get size required to emit given machine location + /// using dwarf encoding. + virtual unsigned getDwarfRegOpSize(const MachineLocation &MLoc) const; + + /// EmitDwarfRegOp - Emit dwarf register operation. + virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const; + virtual unsigned getISAEncoding() { // ARM/Darwin adds ISA to the DWARF info for each function. if (!Subtarget->isTargetDarwin()) diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h index a56cc1a9f249..36edbad7a601 100644 --- a/lib/Target/ARM/ARMBaseInfo.h +++ b/lib/Target/ARM/ARMBaseInfo.h @@ -200,6 +200,59 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { } namespace ARMII { + + /// ARM Index Modes + enum IndexMode { + IndexModeNone = 0, + IndexModePre = 1, + IndexModePost = 2, + IndexModeUpd = 3 + }; + + /// ARM Addressing Modes + enum AddrMode { + AddrModeNone = 0, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrMode6 = 6, + AddrModeT1_1 = 7, + AddrModeT1_2 = 8, + AddrModeT1_4 = 9, + AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data + AddrModeT2_i12 = 11, + AddrModeT2_i8 = 12, + AddrModeT2_so = 13, + AddrModeT2_pc = 14, // +/- i12 for pc relative data + AddrModeT2_i8s4 = 15, // i8 * 4 + AddrMode_i12 = 16 + }; + + inline static const char *AddrModeToString(AddrMode addrmode) { + switch (addrmode) { + default: llvm_unreachable("Unknown memory operation"); + case AddrModeNone: return "AddrModeNone"; + case AddrMode1: return "AddrMode1"; + case AddrMode2: return "AddrMode2"; + case AddrMode3: return "AddrMode3"; + case AddrMode4: return "AddrMode4"; + case AddrMode5: return "AddrMode5"; + case AddrMode6: return "AddrMode6"; + case AddrModeT1_1: return "AddrModeT1_1"; + case AddrModeT1_2: return "AddrModeT1_2"; + case AddrModeT1_4: return "AddrModeT1_4"; + case AddrModeT1_s: return "AddrModeT1_s"; + case AddrModeT2_i12: return "AddrModeT2_i12"; + case AddrModeT2_i8: return "AddrModeT2_i8"; + case AddrModeT2_so: return "AddrModeT2_so"; + case AddrModeT2_pc: return "AddrModeT2_pc"; + case AddrModeT2_i8s4: return "AddrModeT2_i8s4"; + case AddrMode_i12: return "AddrMode_i12"; + } + } + /// Target Operand Flag enum. enum TOF { //===------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2268e59ea7b1..44a397611526 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1021,7 +1021,7 @@ reMaterialize(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), DestReg) .addConstantPoolIndex(CPI).addImm(PCLabelId); - (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); + MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); break; } } @@ -1080,11 +1080,18 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, int CPI1 = MO1.getIndex(); const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; - ARMConstantPoolValue *ACPV0 = - static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); - ARMConstantPoolValue *ACPV1 = - static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); - return ACPV0->hasSameValue(ACPV1); + bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); + bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); + if (isARMCP0 && isARMCP1) { + ARMConstantPoolValue *ACPV0 = + static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); + ARMConstantPoolValue *ACPV1 = + static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); + return ACPV0->hasSameValue(ACPV1); + } else if (!isARMCP0 && !isARMCP1) { + return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; + } + return false; } else if (Opcode == ARM::PICLDR) { if (MI1->getOpcode() != Opcode) return false; @@ -1194,7 +1201,7 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, } /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to -/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should +/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -1263,19 +1270,19 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, } bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, + unsigned NumCycles, unsigned ExtraPredCycles, float Probability, float Confidence) const { - if (!NumCyles) + if (!NumCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - float UnpredCost = Probability * NumCyles; + float UnpredCost = Probability * NumCycles; UnpredCost += 1.0; // The branch itself UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); - return (float)(NumCyles + ExtraPredCycles) < UnpredCost; + return (float)(NumCycles + ExtraPredCycles) < UnpredCost; } bool ARMBaseInstrInfo:: @@ -1328,7 +1335,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, unsigned MIFlags) { bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -1346,7 +1353,8 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); BaseReg = DestReg; } } @@ -1610,18 +1618,84 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Set the "zero" bit in CPSR. switch (MI->getOpcode()) { default: break; + case ARM::RSBrr: + case ARM::RSBri: + case ARM::RSCrr: + case ARM::RSCri: + case ARM::ADDrr: case ARM::ADDri: - case ARM::ANDri: - case ARM::t2ANDri: + case ARM::ADCrr: + case ARM::ADCri: + case ARM::SUBrr: case ARM::SUBri: + case ARM::SBCrr: + case ARM::SBCri: + case ARM::t2RSBri: + case ARM::t2ADDrr: case ARM::t2ADDri: + case ARM::t2ADCrr: + case ARM::t2ADCri: + case ARM::t2SUBrr: case ARM::t2SUBri: + case ARM::t2SBCrr: + case ARM::t2SBCri: + case ARM::ANDrr: + case ARM::ANDri: + case ARM::t2ANDrr: + case ARM::t2ANDri: + case ARM::ORRrr: + case ARM::ORRri: + case ARM::t2ORRrr: + case ARM::t2ORRri: + case ARM::EORrr: + case ARM::EORri: + case ARM::t2EORrr: + case ARM::t2EORri: { + // Scan forward for the use of CPSR, if it's a conditional code requires + // checking of V bit, then this is not safe to do. If we can't find the + // CPSR use (i.e. used in another block), then it's not safe to perform + // the optimization. + bool isSafe = false; + I = CmpInstr; + E = MI->getParent()->end(); + while (!isSafe && ++I != E) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); + !isSafe && IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + isSafe = true; + break; + } + // Condition code is after the operand before CPSR. + ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + switch (CC) { + default: + isSafe = true; + break; + case ARMCC::VS: + case ARMCC::VC: + case ARMCC::GE: + case ARMCC::LT: + case ARMCC::GT: + case ARMCC::LE: + return false; + } + } + } + + if (!isSafe) + return false; + // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); CmpInstr->eraseFromParent(); return true; } + } return false; } @@ -1741,9 +1815,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, llvm_unreachable("Unexpected multi-uops instruction!"); break; case ARM::VLDMQIA: - case ARM::VLDMQDB: case ARM::VSTMQIA: - case ARM::VSTMQDB: return 2; // The number of uOps for load / store multiple are determined by the number @@ -1757,19 +1829,15 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VLDMDIA_UPD: case ARM::VLDMDDB_UPD: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: case ARM::VSTMDIA: - case ARM::VSTMDDB: case ARM::VSTMDIA_UPD: case ARM::VSTMDDB_UPD: case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); @@ -1859,7 +1927,6 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, switch (DefTID.getOpcode()) { default: break; case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: isSLoad = true; @@ -1935,7 +2002,6 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, switch (UseTID.getOpcode()) { default: break; case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: isSStore = true; @@ -2006,11 +2072,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VLDMDIA_UPD: case ARM::VLDMDDB_UPD: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); @@ -2049,11 +2113,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; case ARM::VSTMDIA: - case ARM::VSTMDDB: case ARM::VSTMDIA_UPD: case ARM::VSTMDDB_UPD: case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); @@ -2160,6 +2222,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } + if (DefAlign < 8 && Subtarget.isCortexA9()) + switch (DefTID.getOpcode()) { + default: break; + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2d8_UPD: + case ARM::VLD2d16_UPD: + case ARM::VLD2d32_UPD: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD1d64T: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD1d64Q: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq8_UPD: + case ARM::VLD1DUPq16_UPD: + case ARM::VLD1DUPq32_UPD: + case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16: + case ARM::VLD2DUPd32: + case ARM::VLD2DUPd8_UPD: + case ARM::VLD2DUPd16_UPD: + case ARM::VLD2DUPd32_UPD: + case ARM::VLD4DUPd8: + case ARM::VLD4DUPd16: + case ARM::VLD4DUPd32: + case ARM::VLD4DUPd8_UPD: + case ARM::VLD4DUPd16_UPD: + case ARM::VLD4DUPd32_UPD: + case ARM::VLD1LNd8: + case ARM::VLD1LNd16: + case ARM::VLD1LNd32: + case ARM::VLD1LNd8_UPD: + case ARM::VLD1LNd16_UPD: + case ARM::VLD1LNd32_UPD: + case ARM::VLD2LNd8: + case ARM::VLD2LNd16: + case ARM::VLD2LNd32: + case ARM::VLD2LNq16: + case ARM::VLD2LNq32: + case ARM::VLD2LNd8_UPD: + case ARM::VLD2LNd16_UPD: + case ARM::VLD2LNd32_UPD: + case ARM::VLD2LNq16_UPD: + case ARM::VLD2LNq32_UPD: + case ARM::VLD4LNd8: + case ARM::VLD4LNd16: + case ARM::VLD4LNd32: + case ARM::VLD4LNq16: + case ARM::VLD4LNq32: + case ARM::VLD4LNd8_UPD: + case ARM::VLD4LNd16_UPD: + case ARM::VLD4LNd32_UPD: + case ARM::VLD4LNq16_UPD: + case ARM::VLD4LNq32_UPD: + // If the address is not 64-bit aligned, the latencies of these + // instructions increases by one. + ++Latency; + break; + } + return Latency; } @@ -2226,6 +2383,113 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } + if (DefAlign < 8 && Subtarget.isCortexA9()) + switch (DefTID.getOpcode()) { + default: break; + case ARM::VLD1q8Pseudo: + case ARM::VLD1q16Pseudo: + case ARM::VLD1q32Pseudo: + case ARM::VLD1q64Pseudo: + case ARM::VLD1q8Pseudo_UPD: + case ARM::VLD1q16Pseudo_UPD: + case ARM::VLD1q32Pseudo_UPD: + case ARM::VLD1q64Pseudo_UPD: + case ARM::VLD2d8Pseudo: + case ARM::VLD2d16Pseudo: + case ARM::VLD2d32Pseudo: + case ARM::VLD2q8Pseudo: + case ARM::VLD2q16Pseudo: + case ARM::VLD2q32Pseudo: + case ARM::VLD2d8Pseudo_UPD: + case ARM::VLD2d16Pseudo_UPD: + case ARM::VLD2d32Pseudo_UPD: + case ARM::VLD2q8Pseudo_UPD: + case ARM::VLD2q16Pseudo_UPD: + case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD3d8Pseudo: + case ARM::VLD3d16Pseudo: + case ARM::VLD3d32Pseudo: + case ARM::VLD1d64TPseudo: + case ARM::VLD3d8Pseudo_UPD: + case ARM::VLD3d16Pseudo_UPD: + case ARM::VLD3d32Pseudo_UPD: + case ARM::VLD1d64TPseudo_UPD: + case ARM::VLD3q8Pseudo_UPD: + case ARM::VLD3q16Pseudo_UPD: + case ARM::VLD3q32Pseudo_UPD: + case ARM::VLD3q8oddPseudo: + case ARM::VLD3q16oddPseudo: + case ARM::VLD3q32oddPseudo: + case ARM::VLD3q8oddPseudo_UPD: + case ARM::VLD3q16oddPseudo_UPD: + case ARM::VLD3q32oddPseudo_UPD: + case ARM::VLD4d8Pseudo: + case ARM::VLD4d16Pseudo: + case ARM::VLD4d32Pseudo: + case ARM::VLD1d64QPseudo: + case ARM::VLD4d8Pseudo_UPD: + case ARM::VLD4d16Pseudo_UPD: + case ARM::VLD4d32Pseudo_UPD: + case ARM::VLD1d64QPseudo_UPD: + case ARM::VLD4q8Pseudo_UPD: + case ARM::VLD4q16Pseudo_UPD: + case ARM::VLD4q32Pseudo_UPD: + case ARM::VLD4q8oddPseudo: + case ARM::VLD4q16oddPseudo: + case ARM::VLD4q32oddPseudo: + case ARM::VLD4q8oddPseudo_UPD: + case ARM::VLD4q16oddPseudo_UPD: + case ARM::VLD4q32oddPseudo_UPD: + case ARM::VLD1DUPq8Pseudo: + case ARM::VLD1DUPq16Pseudo: + case ARM::VLD1DUPq32Pseudo: + case ARM::VLD1DUPq8Pseudo_UPD: + case ARM::VLD1DUPq16Pseudo_UPD: + case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD2DUPd8Pseudo: + case ARM::VLD2DUPd16Pseudo: + case ARM::VLD2DUPd32Pseudo: + case ARM::VLD2DUPd8Pseudo_UPD: + case ARM::VLD2DUPd16Pseudo_UPD: + case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD4DUPd8Pseudo: + case ARM::VLD4DUPd16Pseudo: + case ARM::VLD4DUPd32Pseudo: + case ARM::VLD4DUPd8Pseudo_UPD: + case ARM::VLD4DUPd16Pseudo_UPD: + case ARM::VLD4DUPd32Pseudo_UPD: + case ARM::VLD1LNq8Pseudo: + case ARM::VLD1LNq16Pseudo: + case ARM::VLD1LNq32Pseudo: + case ARM::VLD1LNq8Pseudo_UPD: + case ARM::VLD1LNq16Pseudo_UPD: + case ARM::VLD1LNq32Pseudo_UPD: + case ARM::VLD2LNd8Pseudo: + case ARM::VLD2LNd16Pseudo: + case ARM::VLD2LNd32Pseudo: + case ARM::VLD2LNq16Pseudo: + case ARM::VLD2LNq32Pseudo: + case ARM::VLD2LNd8Pseudo_UPD: + case ARM::VLD2LNd16Pseudo_UPD: + case ARM::VLD2LNd32Pseudo_UPD: + case ARM::VLD2LNq16Pseudo_UPD: + case ARM::VLD2LNq32Pseudo_UPD: + case ARM::VLD4LNd8Pseudo: + case ARM::VLD4LNd16Pseudo: + case ARM::VLD4LNd32Pseudo: + case ARM::VLD4LNq16Pseudo: + case ARM::VLD4LNq32Pseudo: + case ARM::VLD4LNd8Pseudo_UPD: + case ARM::VLD4LNd16Pseudo_UPD: + case ARM::VLD4LNd32Pseudo_UPD: + case ARM::VLD4LNq16Pseudo_UPD: + case ARM::VLD4LNq32Pseudo_UPD: + // If the address is not 64-bit aligned, the latencies of these + // instructions increases by one. + ++Latency; + break; + } + return Latency; } @@ -2264,9 +2528,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, default: return ItinData->getStageLatency(get(Opcode).getSchedClass()); case ARM::VLDMQIA: - case ARM::VLDMQDB: case ARM::VSTMQIA: - case ARM::VSTMQDB: return 2; } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 7e2183d7cd5e..9a2faf8f9aae 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -34,25 +34,7 @@ namespace ARMII { //===------------------------------------------------------------------===// // This four-bit field describes the addressing mode used. - - AddrModeMask = 0x1f, - AddrModeNone = 0, - AddrMode1 = 1, - AddrMode2 = 2, - AddrMode3 = 3, - AddrMode4 = 4, - AddrMode5 = 5, - AddrMode6 = 6, - AddrModeT1_1 = 7, - AddrModeT1_2 = 8, - AddrModeT1_4 = 9, - AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data - AddrModeT2_i12 = 11, - AddrModeT2_i8 = 12, - AddrModeT2_so = 13, - AddrModeT2_pc = 14, // +/- i12 for pc relative data - AddrModeT2_i8s4 = 15, // i8 * 4 - AddrMode_i12 = 16, + AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h // Size* - Flags to keep track of the size of an instruction. SizeShift = 5, @@ -64,11 +46,9 @@ namespace ARMII { // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load // and store ops only. Generic "updating" flag is used for ld/st multiple. + // The index mode enums are declared in ARMBaseInfo.h IndexModeShift = 8, IndexModeMask = 3 << IndexModeShift, - IndexModePre = 1, - IndexModePost = 2, - IndexModeUpd = 3, //===------------------------------------------------------------------===// // Instruction encoding formats. @@ -311,7 +291,7 @@ public: int64_t &Offset1, int64_t &Offset2)const; /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to - /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -327,7 +307,7 @@ public: const MachineFunction &MF) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, unsigned ExtraPredCycles, + unsigned NumCycles, unsigned ExtraPredCycles, float Prob, float Confidence) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, @@ -337,10 +317,10 @@ public: float Probability, float Confidence) const; virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, + unsigned NumCycles, float Probability, float Confidence) const { - return NumCyles == 1; + return NumCycles == 1; } /// AnalyzeCompare - For a comparison instruction, return the source register @@ -496,19 +476,19 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl); + unsigned MIFlags = 0); /// rewriteARMFrameIndex / rewriteT2FrameIndex - diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 67a4b7d49398..ea1f08a7da8d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -88,7 +88,7 @@ BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - // FIXME: avoid re-calculating this everytime. + // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); @@ -342,12 +342,51 @@ ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, return false; } +const TargetRegisterClass* +ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) + const { + const TargetRegisterClass *Super = RC; + TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + do { + switch (Super->getID()) { + case ARM::GPRRegClassID: + case ARM::SPRRegClassID: + case ARM::DPRRegClassID: + case ARM::QPRRegClassID: + case ARM::QQPRRegClassID: + case ARM::QQQQPRRegClassID: + return Super; + } + Super = *I++; + } while (Super); + return RC; +} const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; } +unsigned +ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + switch (RC->getID()) { + default: + return 0; + case ARM::tGPRRegClassID: + return TFI->hasFP(MF) ? 4 : 5; + case ARM::GPRRegClassID: { + unsigned FP = TFI->hasFP(MF) ? 1 : 0; + return 10 - FP - (STI.isR9Reserved() ? 1 : 0); + } + case ARM::SPRRegClassID: // Currently not used as 'rep' register class. + case ARM::DPRRegClassID: + return 32 - 10; + } +} + /// getAllocationOrder - Returns the register allocation order for a specified /// register class in the form of a pair of TargetRegisterClass iterators. std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> @@ -428,6 +467,10 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 }; + // We only support even/odd hints for GPR and rGPR. + if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass) + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); if (HintType == ARMRI::RegPairEven) { if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) @@ -530,6 +573,29 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } +bool +ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + // CortexA9 has a Write-after-write hazard for NEON registers. + if (!STI.isCortexA9()) + return false; + + switch (RC->getID()) { + case ARM::DPRRegClassID: + case ARM::DPR_8RegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::QPRRegClassID: + case ARM::QPR_8RegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::SPRRegClassID: + case ARM::SPR_8RegClassID: + // Avoid reusing S, D, and Q registers. + // Don't increase register pressure for QQ and QQQQ. + return true; + default: + return false; + } +} + bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -806,7 +872,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, - unsigned PredReg) const { + unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = @@ -816,7 +882,8 @@ emitLoadConstPool(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) .addReg(DestReg, getDefRegState(true), SubIdx) .addConstantPoolIndex(Idx) - .addImm(0).addImm(Pred).addReg(PredReg); + .addImm(0).addImm(Pred).addReg(PredReg) + .setMIFlags(MIFlags); } bool ARMBaseRegisterInfo:: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index ba6bd2b32082..9edf72df2158 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -128,6 +128,12 @@ public: const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator> getAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, @@ -139,6 +145,8 @@ public: void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; @@ -176,7 +184,8 @@ public: unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 426ba13a8e11..d2981c0af8ca 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -22,6 +22,9 @@ class CCIfAlign<string Align, CCAction A>: //===----------------------------------------------------------------------===// def CC_ARM_APCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal<CCPassByVal<4, 4>>, + CCIfType<[i8, i16], CCPromoteToType<i32>>, // Handle all vector types as either f64 or v2f64. diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 9bbf6a030687..fa7371626f29 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -312,6 +312,15 @@ namespace { unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the /// machine operand requires relocation, record the relocation and return /// zero. @@ -969,7 +978,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, const TargetInstrDesc &TID) const { - for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ + for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) return 1 << ARMII::S_BitShift; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 13d1b33d1165..baf95a33dd4b 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1650,24 +1650,27 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; unsigned DestOffset = BBOffsets[DestBB->getNumber()]; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { - MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI; - if (CmpMI->getOpcode() == ARM::tCMPi8) { - unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = llvm::getInstrPredicate(CmpMI, PredReg); - if (Pred == ARMCC::AL && - CmpMI->getOperand(1).getImm() == 0 && - isARMLowRegister(Reg)) { - MachineBasicBlock *MBB = Br.MI->getParent(); - MachineInstr *NewBR = - BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) - .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); - CmpMI->eraseFromParent(); - Br.MI->eraseFromParent(); - Br.MI = NewBR; - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); - ++NumCBZ; - MadeChange = true; + MachineBasicBlock::iterator CmpMI = Br.MI; + if (CmpMI != Br.MI->getParent()->begin()) { + --CmpMI; + if (CmpMI->getOpcode() == ARM::tCMPi8) { + unsigned Reg = CmpMI->getOperand(0).getReg(); + Pred = llvm::getInstrPredicate(CmpMI, PredReg); + if (Pred == ARMCC::AL && + CmpMI->getOperand(1).getImm() == 0 && + isARMLowRegister(Reg)) { + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineInstr *NewBR = + BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); + CmpMI->eraseFromParent(); + Br.MI->eraseFromParent(); + Br.MI = NewBR; + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumCBZ; + MadeChange = true; + } } } } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index bd753d29abde..b6b3c75943b5 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -455,6 +455,10 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MI.eraseFromParent(); } @@ -496,10 +500,13 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) - // Add an implicit kill for the super-reg. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the super-reg. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); + + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MI.eraseFromParent(); } @@ -622,9 +629,8 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) - // Add an implicit kill for the super-reg. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the super-reg. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } @@ -655,8 +661,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); LO16 = LO16.addImm(SOImmValV1); HI16 = HI16.addImm(SOImmValV2); - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg).addReg(0); HI16.addImm(Pred).addReg(PredReg).addReg(0); TransferImpOps(MI, LO16, HI16); @@ -692,8 +698,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); @@ -708,6 +714,78 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, switch (Opcode) { default: return false; + case ARM::VMOVScc: + case ARM::VMOVDcc: { + unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()); + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCr: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCs: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + (MI.getOperand(1).getReg())) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addReg(MI.getOperand(3).getReg(), + getKillRegState(MI.getOperand(3).isKill())) + .addImm(MI.getOperand(4).getImm()) + .addImm(MI.getOperand(5).getImm()) // 'pred' + .addReg(MI.getOperand(6).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCi16: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()); + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MVNCCi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = @@ -726,9 +804,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { - llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6, - FramePtr, -NumBytes, - *TII, RI, MI.getDebugLoc()); + llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *TII, RI); } else { llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, @@ -785,7 +862,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, TII->get(ARM::BL)) .addExternalSymbol("__aeabi_read_tp", 0); - (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; @@ -800,7 +877,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) .addOperand(MI.getOperand(1))); - (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -823,7 +900,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); - bool isARM = Opcode != ARM::t2MOV_ga_pcrel; + bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn); bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel; @@ -856,7 +933,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (isARM) { AddDefaultPred(MIB3); if (Opcode == ARM::MOV_ga_pcrel_ldr) - (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } TransferImpOps(MI, MIB1, MIB3); MI.eraseFromParent(); @@ -896,9 +973,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VLDMQIA: - case ARM::VLDMQDB: { - unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB; + case ARM::VLDMQIA: { + unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; @@ -927,9 +1003,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VSTMQIA: - case ARM::VSTMQDB: { - unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB; + case ARM::VSTMQIA: { + unsigned NewOpc = ARM::VSTMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; @@ -950,9 +1025,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); MIB.addReg(D0).addReg(D1); - if (SrcIsKill) - // Add an implicit kill for the Q register. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the Q register. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); @@ -960,14 +1034,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::VDUPfqf: case ARM::VDUPfdf:{ - unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd; + unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q : + ARM::VDUPLN32d; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, - Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); + Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, + &ARM::DPR_VFP2RegClass); // The lane is [0,1] for the containing DReg superregister. // Copy the dst/src register operands. MIB.addOperand(MI.getOperand(OpIdx++)); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 26f48b308316..3baf274b76b8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" #include "ARMRegisterInfo.h" @@ -26,6 +27,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -115,6 +117,11 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); + virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill); virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -123,14 +130,18 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm); - virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm); virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); + virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2); + virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx); @@ -193,6 +204,7 @@ class ARMFastISel : public FastISel { // OptionalDef handling routines. private: + bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(EVT VT, Address &Addr, @@ -221,6 +233,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { return true; } +bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { + const TargetInstrDesc &TID = MI->getDesc(); + + // If we're a thumb2 or not NEON function we were handled via isPredicable. + if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || + AFI->isThumb2Function()) + return false; + + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) + if (TID.OpInfo[i].isPredicate()) + return true; + + return false; +} + // If the machine is predicable go ahead and add the predicate operands, if // it needs default CC operands add those. // TODO: If we want to support thumb1 then we'll need to deal with optional @@ -230,8 +257,10 @@ const MachineInstrBuilder & ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { MachineInstr *MI = &*MIB; - // Do we use a predicate? - if (TII.isPredicable(MI)) + // Do we use a predicate? or... + // Are we NEON in ARM mode and have a predicate operand? If so, I know + // we're not predicable but add it anyways. + if (TII.isPredicable(MI) || isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate @@ -296,6 +325,31 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } +unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -384,6 +438,26 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(Imm1).addImm(Imm2)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -667,24 +741,29 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - SmallVector<const Value *, 4> Worklist; - Worklist.push_back(Op); - do { - Op = Worklist.pop_back_val(); + for (;;) { if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; - } else if (isa<AddOperator>(Op) && - isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { - // An add with a constant operand. Fold the constant. + break; + } + if (isa<AddOperator>(Op) && + (!isa<Instruction>(Op) || + FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] + == FuncInfo.MBB) && + isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { + // An add (in the same block) with a constant operand. Fold the + // constant. ConstantInt *CI = - cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; - // Add the other operand back to the work list. - Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); - } else - goto unsupported_gep; - } while (!Worklist.empty()); + // Iterate on the other operand. + Op = cast<AddOperator>(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } } } @@ -767,26 +846,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { // Since the offset is too large for the load/store instruction // get the reg+offset into a register. if (needsLowering) { - ARMCC::CondCodes Pred = ARMCC::AL; - unsigned PredReg = 0; - - TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : - ARM::GPRRegisterClass; - unsigned BaseReg = createResultReg(RC); - - if (!isThumb) - emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Addr.Base.Reg, Addr.Offset, - Pred, PredReg, - static_cast<const ARMBaseInstrInfo&>(TII)); - else { - assert(AFI->isThumb2Function()); - emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg, - static_cast<const ARMBaseInstrInfo&>(TII)); - } + Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, + /*Op0IsKill*/false, Addr.Offset, MVT::i32); Addr.Offset = 0; - Addr.Base.Reg = BaseReg; } } @@ -797,7 +859,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, if (VT.getSimpleVT().SimpleTy == MVT::f32 || VT.getSimpleVT().SimpleTy == MVT::f64) Addr.Offset /= 4; - + // Frame base works a bit differently. Handle it separately. if (Addr.BaseType == Address::FrameIndexBase) { int FI = Addr.Base.FI; @@ -819,7 +881,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, } else { // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - + // ARM halfword load/stores need an additional operand. if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); @@ -1007,18 +1069,16 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // behavior. // TODO: Factor this out. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { - if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - MVT VT; - const Type *Ty = CI->getOperand(0)->getType(); - if (!isTypeLegal(Ty, VT)) - return false; - + MVT SourceVT; + const Type *Ty = CI->getOperand(0)->getType(); + if (CI->hasOneUse() && (CI->getParent() == I->getParent()) + && isTypeLegal(Ty, SourceVT)) { bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); if (isFloat && !Subtarget->hasVFP2()) return false; unsigned CmpOpc; - switch (VT.SimpleTy) { + switch (SourceVT.SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: @@ -1033,7 +1093,14 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { } // Get the compare predicate. - ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); + // Try to take advantage of fallthrough opportunities. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } + + ARMCC::CondCodes ARMPred = getComparePred(Predicate); // We may not handle every CC for now. if (ARMPred == ARMCC::AL) return false; @@ -1061,19 +1128,55 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { FuncInfo.MBB->addSuccessor(TBB); return true; } + } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { + MVT SourceVT; + if (TI->hasOneUse() && TI->getParent() == I->getParent() && + (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { + unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned OpReg = getRegForValue(TI->getOperand(0)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TstOpc)) + .addReg(OpReg).addImm(1)); + + unsigned CCMode = ARMCC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CCMode = ARMCC::EQ; + } + + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); + + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; + } } unsigned CmpReg = getRegForValue(BI->getCondition()); if (CmpReg == 0) return false; - // Re-set the flags just in case. - unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CmpReg).addImm(0)); + // We've been divorced from our compare! Our block was split, and + // now our compare lives in a predecessor block. We musn't + // re-compare here, as the children of the compare aren't guaranteed + // live across the block boundary (we *could* check for this). + // Regardless, the compare has been done in the predecessor block, + // and it left a value for us in a virtual register. Ergo, we test + // the one-bit value left in the virtual register. + unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) + .addReg(CmpReg).addImm(1)); + + unsigned CCMode = ARMCC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CCMode = ARMCC::EQ; + } unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) - .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -1636,17 +1739,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) { unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - // Depend our opcode for thumb on whether or not we're targeting an - // externally callable function. For libcalls we'll just pass a NULL GV - // in here. - bool isExternal = false; - if (!GV || GV->hasExternalLinkage()) isExternal = true; - // Darwin needs the r9 versions of the opcodes. bool isDarwin = Subtarget->isTargetDarwin(); - if (isThumb && isExternal) { - return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; - } else if (isThumb) { + if (isThumb) { return isDarwin ? ARM::tBLr9 : ARM::tBL; } else { return isDarwin ? ARM::BLr9 : ARM::BL; @@ -1671,9 +1766,6 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // For now we're using BLX etc on the assumption that we have v5t ops. - if (!Subtarget->hasV5TOps()) return false; - // TODO: For now if we have long calls specified we don't handle the call. if (EnableARMLongCalls) return false; @@ -1711,7 +1803,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // Issue the call, BLr9 for darwin, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); @@ -1772,13 +1864,9 @@ bool ARMFastISel::SelectCall(const Instruction *I) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // For now we're using BLX etc on the assumption that we have v5t ops. - // TODO: Maybe? - if (!Subtarget->hasV5TOps()) return false; - // TODO: For now if we have long calls specified we don't handle the call. if (EnableARMLongCalls) return false; - + // Set up the argument vectors. SmallVector<Value*, 8> Args; SmallVector<unsigned, 8> ArgRegs; @@ -1827,7 +1915,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // Issue the call, BLr9 for darwin, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); @@ -1842,7 +1930,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 68c33f098ec9..e2e95d47b37b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -106,14 +106,13 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); + ARMCC::AL, 0, TII, MIFlags); else emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); + ARMCC::AL, 0, TII, MIFlags); } void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -141,11 +140,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, + MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); return; } @@ -196,7 +197,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); + .addFrameIndex(FramePtrSpillFI).addImm(0) + .setMIFlag(MachineInstr::FrameSetup); AddDefaultCC(AddDefaultPred(MIB)); } @@ -226,7 +228,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 // Note it's not safe to do this in Thumb2 mode because it would have @@ -282,6 +285,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. + // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) BuildMI(MBB, MBBI, dl, @@ -396,8 +400,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) - ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd) - : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND); + ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) + : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), @@ -408,10 +412,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, JumpTarget.getTargetFlags()); } } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); } @@ -439,8 +445,7 @@ ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg, + int FI, unsigned &FrameReg, int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMBaseRegisterInfo *RegInfo = @@ -484,19 +489,23 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, return FPOffset; } else if (MFI->hasVarSizedObjects()) { assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); - // Try to use the frame pointer if we can, else use the base pointer - // since it's available. This is handy for the emergency spill slot, in - // particular. if (AFI->isThumb2Function()) { + // Try to use the frame pointer if we can, else use the base pointer + // since it's available. This is handy for the emergency spill slot, in + // particular. if (FPOffset >= -255 && FPOffset < 0) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; } - } else - FrameReg = RegInfo->getBaseRegister(); + } } else if (AFI->isThumb2Function()) { + // Use add <rd>, sp, #<imm8> + // ldr <rd>, [sp, #<imm8>] + // if at all possible to save space. + if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) + return Offset; // In Thumb2 mode, the negative offset is very limited. Try to avoid - // out of range references. + // out of range references. ldr <rt>,[<rn>, #-<imm8>] if (FPOffset >= -255 && FPOffset < 0) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -524,7 +533,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool)) const { + bool(*Func)(unsigned, bool), + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); @@ -567,14 +577,14 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, if (Regs.size() > 1 || StrOpc== 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) - .addReg(ARM::SP)); + .addReg(ARM::SP).setMIFlags(MIFlags)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP); + .addReg(ARM::SP).setMIFlags(MIFlags); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). if (StrOpc == ARM::STR_PRE) { @@ -676,9 +686,12 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE; unsigned FltOpc = ARM::VSTMDDB_UPD; - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register); - emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + MachineInstr::FrameSetup); return true; } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 1288b706c599..61bb8afa40f2 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -51,7 +51,8 @@ public: bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; - int ResolveFrameIndexReference(const MachineFunction &MF, int FI, + int ResolveFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; @@ -62,7 +63,8 @@ public: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool)) const; + bool(*Func)(unsigned, bool), + unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index e97ce50bc429..517bba8cee8e 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -49,6 +49,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { const TargetInstrDesc &LastTID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. if (!LastTID.isBarrier() && + // On A9, AGU and NEON/FPU are muxed. + !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) && (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index f0d5a7d7c2e7..abe5a316a45b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -45,7 +45,7 @@ DisableShifterOp("disable-shifter-op", cl::Hidden, static cl::opt<bool> CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), - cl::init(false)); + cl::init(true)); //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine @@ -91,9 +91,14 @@ public: bool isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); bool SelectShifterOperandReg(SDValue N, SDValue &A, - SDValue &B, SDValue &C); + SDValue &B, SDValue &C, + bool CheckProfitability = true); bool SelectShiftShifterOperandReg(SDValue N, SDValue &A, - SDValue &B, SDValue &C); + SDValue &B, SDValue &C) { + // Don't apply the profitability check + return SelectShifterOperandReg(N, A, B, C, false); + } + bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); @@ -174,16 +179,6 @@ public: return ARM_AM::getT2SOImmVal(~Imm) != -1; } - inline bool Pred_so_imm(SDNode *inN) const { - ConstantSDNode *N = cast<ConstantSDNode>(inN); - return is_so_imm(N->getZExtValue()); - } - - inline bool Pred_t2_so_imm(SDNode *inN) const { - ConstantSDNode *N = cast<ConstantSDNode>(inN); - return is_t2_so_imm(N->getZExtValue()); - } - // Include the pieces autogenerated from the target description. #include "ARMGenDAGISel.inc" @@ -373,7 +368,8 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, SDValue &BaseReg, SDValue &ShReg, - SDValue &Opc) { + SDValue &Opc, + bool CheckProfitability) { if (DisableShifterOp) return false; @@ -390,7 +386,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, ShImmVal = RHS->getZExtValue() & 31; } else { ShReg = N.getOperand(1); - if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal)) + if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) return false; } Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), @@ -398,30 +394,6 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, return true; } -bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N, - SDValue &BaseReg, - SDValue &ShReg, - SDValue &Opc) { - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); - - // Don't match base register only case. That is matched to a separate - // lower complexity pattern with explicit register operand. - if (ShOpcVal == ARM_AM::no_shift) return false; - - BaseReg = N.getOperand(0); - unsigned ShImmVal = 0; - // Do not check isShifterOpProfitable. This must return true. - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - ShReg = CurDAG->getRegister(0, MVT::i32); - ShImmVal = RHS->getZExtValue() & 31; - } else { - ShReg = N.getOperand(1); - } - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), - MVT::i32); - return true; -} - bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm) { @@ -437,7 +409,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -1138,7 +1110,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -1183,7 +1155,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && !CurDAG->isBaseWithConstantOffset(N)) return false; - + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { int RHSC = (int)RHS->getSExtValue(); if (N.getOpcode() == ISD::SUB) @@ -1571,6 +1543,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.data(), Ops.size()); } + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1); + if (NumVecs == 1) return VLd; @@ -1600,6 +1577,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); EVT VT = N->getOperand(Vec0Idx).getValueType(); bool is64BitVector = VT.is64BitVector(); @@ -1672,7 +1652,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SDNode *VSt = + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + + // Transfer memoperands. + cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1); + + return VSt; } // Otherwise, quad registers are stored with two separate instructions, @@ -1693,6 +1679,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, MemAddr.getValueType(), MVT::Other, OpsA, 7); + cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1); Chain = SDValue(VStA, 1); // Store the odd D registers. @@ -1709,8 +1696,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, - Ops.data(), Ops.size()); + SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, + Ops.data(), Ops.size()); + cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1); + return VStB; } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1726,6 +1715,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); unsigned Lane = cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); @@ -1812,6 +1804,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, QOpcodes[OpcodeIndex]); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1); if (!IsLoad) return VLdLn; @@ -1838,6 +1831,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); @@ -1882,12 +1878,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; std::vector<EVT> ResTys; - ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts)); + ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); if (isUpdating) ResTys.push_back(MVT::i32); ResTys.push_back(MVT::Other); SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1); SuperReg = SDValue(VLdDup, 0); // Extract the subregisters. @@ -2168,7 +2165,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) // Pattern complexity = 6 cost = 11 size = 0 // - // Also FCPYScc and FCPYDcc. + // Also VMOVScc and VMOVDcc. SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; unsigned Opc = 0; @@ -2450,34 +2447,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); - case ARMISD::CNEG: { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - SDValue N3 = N->getOperand(3); - SDValue InFlag = N->getOperand(4); - assert(N2.getOpcode() == ISD::Constant); - assert(N3.getOpcode() == ISD::Register); - - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast<ConstantSDNode>(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; - case MVT::f32: - Opc = ARM::VNEGScc; - break; - case MVT::f64: - Opc = ARM::VNEGDcc; - break; - } - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); - } - case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -2870,6 +2839,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; } + case ARMISD::VTBL1: { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SmallVector<SDValue, 6> Ops; + + Ops.push_back(N->getOperand(0)); + Ops.push_back(N->getOperand(1)); + Ops.push_back(getAL(CurDAG)); // Predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register + return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size()); + } + case ARMISD::VTBL2: { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // Form a REG_SEQUENCE to force register allocation. + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + + SmallVector<SDValue, 6> Ops; + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(2)); + Ops.push_back(getAL(CurDAG)); // Predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register + return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT, + Ops.data(), Ops.size()); + } + case ISD::CONCAT_VECTORS: return SelectConcatVector(N); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ab9f9e1571e3..0a31b87c4b56 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -72,6 +72,11 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); +// The APCS parameter registers. +static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 +}; + void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { @@ -393,6 +398,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); } + // Use divmod iOS compiler-rt calls. + if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); + setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else @@ -461,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::UDIV, MVT::v8i8, Custom); setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with + // a destination type that is wider than the source. + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -502,18 +517,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // i64 operation support. + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); if (Subtarget->isThumb1Only()) { - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - } else { - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - if (!Subtarget->hasV6Ops()) - setOperationAction(ISD::MULHS, MVT::i32, Expand); } + if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()) + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); @@ -597,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } @@ -716,7 +740,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // pressure of the register class's representative and all of it's super // classes' representatives transitively. We have not implemented this because // of the difficulty prior to coalescing of modeling operand register classes -// due to the common occurence of cross class copies and subregister insertions +// due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair<const TargetRegisterClass*, uint8_t> ARMTargetLowering::findRepresentativeClass(EVT VT) const{ @@ -778,7 +802,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::CNEG: return "ARMISD::CNEG"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -853,6 +876,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::VTBL1: return "ARMISD::VTBL1"; + case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; @@ -861,6 +886,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; + case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; @@ -946,27 +972,6 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; } -// FIXME: Move to RegInfo -unsigned -ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (RC->getID()) { - default: - return 0; - case ARM::tGPRRegClassID: - return TFI->hasFP(MF) ? 4 : 5; - case ARM::GPRRegClassID: { - unsigned FP = TFI->hasFP(MF) ? 1 : 0; - return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); - } - case ARM::SPRRegClassID: // Currently not used as 'rep' register class. - case ARM::DPRRegClassID: - return 32 - 10; - } -} - //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -1130,22 +1135,6 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, return Chain; } -/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified -/// by "Src" to address "Dst" of size "Size". Alignment information is -/// specified by the specific parameter attribute. The copy will be passed as -/// a byval function parameter. -/// Sometimes what we are copying is the end of a larger object, the part that -/// does not fit in registers. -static SDValue -CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, - ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - DebugLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); - return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); -} - /// LowerMemOpCallTo - Store the argument to the stack. SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, @@ -1156,9 +1145,6 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - if (Flags.isByVal()) - return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - return DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(LocMemOffset), false, false, 0); @@ -1224,6 +1210,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.setCallOrPrologue(Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); @@ -1253,6 +1240,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; + bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { @@ -1299,6 +1287,43 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else if (isByVal) { + assert(VA.isMemLoc()); + unsigned offset = 0; + + // True if this byval aggregate will be split between registers + // and memory. + if (CCInfo.isFirstByValRegValid()) { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned int i, j; + for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { + SDValue Const = DAG.getConstant(4*i, MVT::i32); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(j, Load)); + } + offset = ARM::R4 - CCInfo.getFirstByValReg(); + CCInfo.clearFirstByValReg(); + } + + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); + SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + StkPtrOff); + SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); + SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, + MVT::i32); + MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, + Flags.getByValAlign(), + /*isVolatile=*/false, + /*AlwaysInline=*/false, + MachinePointerInfo(0), + MachinePointerInfo(0))); + } else if (!IsSibCall) { assert(VA.isMemLoc()); @@ -1332,7 +1357,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, @@ -1492,6 +1517,35 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, dl, DAG, InVals); } +/// HandleByVal - Every parameter *after* a byval parameter is passed +/// on the stack. Remember the next parameter register to allocate, +/// and then confiscate the rest of the parameter registers to insure +/// this. +void +llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { + unsigned reg = State->AllocateReg(GPRArgRegs, 4); + assert((State->getCallOrPrologue() == Prologue || + State->getCallOrPrologue() == Call) && + "unhandled ParmContext"); + if ((!State->isFirstByValRegValid()) && + (ARM::R0 <= reg) && (reg <= ARM::R3)) { + State->setFirstByValReg(reg); + // At a call site, a byval parameter that is split between + // registers and memory needs its size truncated here. In a + // function prologue, such byval parameters are reassembled in + // memory, and are not truncated. + if (State->getCallOrPrologue() == Call) { + unsigned excess = 4 * (ARM::R4 - reg); + assert(size >= excess && "expected larger existing stack allocation"); + size -= excess; + } + } + // Confiscate any remaining parameter registers to preclude their + // assignment to subsequent parameters. + while (State->AllocateReg(GPRArgRegs, 4)) + ; +} + /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. @@ -1813,6 +1867,16 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { return HasRet; } +bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!EnableARMTailCalls) + return false; + + if (!CI->isTailCall()) + return false; + + return !Subtarget->isThumb1Only(); +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -2096,7 +2160,7 @@ ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); + Op.getOperand(0)); } SDValue @@ -2151,6 +2215,13 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } + case Intrinsic::arm_neon_vmulls: + case Intrinsic::arm_neon_vmullu: { + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) + ? ARMISD::VMULLs : ARMISD::VMULLu; + return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } } } @@ -2257,6 +2328,88 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } +void +ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned &VARegSize, unsigned &VARegSaveSize) + const { + unsigned NumGPRs; + if (CCInfo.isFirstByValRegValid()) + NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); + else { + unsigned int firstUnalloced; + firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, + sizeof(GPRArgRegs) / + sizeof(GPRArgRegs[0])); + NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; + } + + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + VARegSize = NumGPRs * 4; + VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); +} + +// The remaining GPRs hold either the beginning of variable-argument +// data, or the beginning of an aggregate passed by value (usuall +// byval). Either way, we allocate stack slots adjacent to the data +// provided by our caller, and store the unallocated registers there. +// If this is a variadic function, the va_list pointer will begin with +// these values; otherwise, this reassembles a (byval) structure that +// was split between registers and memory. +void +ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + unsigned ArgOffset) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned firstRegToSaveIndex; + if (CCInfo.isFirstByValRegValid()) + firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; + else { + firstRegToSaveIndex = CCInfo.getFirstUnallocated + (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + } + + unsigned VARegSize, VARegSaveSize; + computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); + if (VARegSaveSize) { + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + AFI->setVarArgsRegSaveSize(VARegSaveSize); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, + ArgOffset + VARegSaveSize + - VARegSize, + false)); + SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), + getPointerTy()); + + SmallVector<SDValue, 4> MemOps; + for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { + TargetRegisterClass *RC; + if (AFI->isThumb1OnlyFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + } + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } else + // This will point to the next argument passed via stack. + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); +} + SDValue ARMTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -2265,7 +2418,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -2275,12 +2427,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.setCallOrPrologue(Prologue); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); SmallVector<SDValue, 16> ArgValues; + int lastInsIndex = -1; + SDValue ArgValue; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2288,7 +2443,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); - SDValue ArgValue; if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. @@ -2364,67 +2518,45 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); - unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); + int index = ArgLocs[i].getValNo(); + + // Some Ins[] entries become multiple ArgLoc[] entries. + // Process them only once. + if (index != lastInsIndex) + { + ISD::ArgFlagsTy Flags = Ins[index].Flags; + // FIXME: For now, all byval parameter objects are marked mutable. + // This can be changed with more analysis. + // In case of tail call optimization mark all arguments mutable. + // Since they could be overwritten by lowering of arguments in case of + // a tail call. + if (Flags.isByVal()) { + unsigned VARegSize, VARegSaveSize; + computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); + unsigned Bytes = Flags.getByValSize() - VARegSize; + if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. + int FI = MFI->CreateFixedObject(Bytes, + VA.getLocMemOffset(), false); + InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + } else { + int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); - // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } + lastInsIndex = index; + } } } // varargs - if (isVarArg) { - static const unsigned GPRArgRegs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - unsigned NumGPRs = CCInfo.getFirstUnallocated - (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); - - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); - unsigned VARegSize = (4 - NumGPRs) * 4; - unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); - unsigned ArgOffset = CCInfo.getNextStackOffset(); - if (VARegSaveSize) { - // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing - // the result of va_next. - AFI->setVarArgsRegSaveSize(VARegSaveSize); - AFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(VARegSaveSize, - ArgOffset + VARegSaveSize - VARegSize, - false)); - SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), - getPointerTy()); - - SmallVector<SDValue, 4> MemOps; - for (; NumGPRs < 4; ++NumGPRs) { - TargetRegisterClass *RC; - if (AFI->isThumb1OnlyFunction()) - RC = ARM::tGPRRegisterClass; - else - RC = ARM::GPRRegisterClass; - - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, getPointerTy())); - } - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); - } else - // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); - } + if (isVarArg) + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); return Chain; } @@ -2517,6 +2649,27 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } +/// duplicateCmp - Glue values can have only one use, so this function +/// duplicates a comparison node. +SDValue +ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { + unsigned Opc = Cmp.getOpcode(); + DebugLoc DL = Cmp.getDebugLoc(); + if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) + return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + + assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); + Cmp = Cmp.getOperand(0); + Opc = Cmp.getOpcode(); + if (Opc == ARMISD::CMPFP) + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + else { + assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); + } + return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -2552,7 +2705,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Cond.getValueType(); SDValue ARMcc = Cond.getOperand(2); SDValue CCR = Cond.getOperand(3); - SDValue Cmp = Cond.getOperand(4); + SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); } } @@ -2681,8 +2834,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { // If one of the operand is zero, it's safe to ignore the NaN case since // we only care about equality comparisons. (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { - // If unsafe fp math optimization is enabled and there are no othter uses of - // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // If unsafe fp math optimization is enabled and there are no other uses of + // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. if (CC == ISD::SETOEQ) CC = ISD::SETEQ; @@ -2811,8 +2964,39 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); } +static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + EVT OperandVT = Op.getOperand(0).getValueType(); + assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); + if (VT != MVT::v4f32) + return DAG.UnrollVectorOp(Op.getNode()); + + unsigned CastOpc; + unsigned Opc; + switch (Op.getOpcode()) { + default: + assert(0 && "Invalid opcode!"); + case ISD::SINT_TO_FP: + CastOpc = ISD::SIGN_EXTEND; + Opc = ISD::SINT_TO_FP; + break; + case ISD::UINT_TO_FP: + CastOpc = ISD::ZERO_EXTEND; + Opc = ISD::UINT_TO_FP; + break; + } + + Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); + return DAG.getNode(Opc, dl, VT, Op); +} + static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); + if (VT.isVector()) + return LowerVectorINT_TO_FP(Op, DAG); + DebugLoc dl = Op.getDebugLoc(); unsigned Opc; @@ -2860,7 +3044,10 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), DAG.getConstant(32, MVT::i32)); - } + } else if (VT == MVT::f32) + Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), + DAG.getConstant(32, MVT::i32)); Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); @@ -2869,11 +3056,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); - + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); - if (SrcVT == MVT::f32) { + if (VT == MVT::f32) { Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, DAG.getConstant(0, MVT::i32)); @@ -3508,6 +3695,13 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, return true; } +static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { + // We can handle <8 x i8> vector shuffles. If the index in the mask is out of + // range, then 0 is placed into the resulting vector. So pretty much any mask + // of 8 elements can work here. + return VT == MVT::v8i8 && M.size() == 8; +} + static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -3947,6 +4141,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || isVEXTMask(M, VT, ReverseVEXT, Imm) || + isVTBLMask(M, VT) || isVTRNMask(M, VT, WhichResult) || isVUZPMask(M, VT, WhichResult) || isVZIPMask(M, VT, WhichResult) || @@ -4024,6 +4219,29 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, } } +static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, + SmallVectorImpl<int> &ShuffleMask, + SelectionDAG &DAG) { + // Check to see if we can use the VTBL instruction. + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc DL = Op.getDebugLoc(); + + SmallVector<SDValue, 8> VTBLMask; + for (SmallVectorImpl<int>::iterator + I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) + VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); + + if (V2.getNode()->getOpcode() == ISD::UNDEF) + return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, + &VTBLMask[0], 8)); + + return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, + &VTBLMask[0], 8)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4141,6 +4359,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if (VT == MVT::v8i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); + if (NewOp.getNode()) + return NewOp; + } + return SDValue(); } @@ -4290,6 +4514,28 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } +static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::ADD || Opcode == ISD::SUB) { + SDNode *N0 = N->getOperand(0).getNode(); + SDNode *N1 = N->getOperand(1).getNode(); + return N0->hasOneUse() && N1->hasOneUse() && + isSignExtended(N0, DAG) && isSignExtended(N1, DAG); + } + return false; +} + +static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::ADD || Opcode == ISD::SUB) { + SDNode *N0 = N->getOperand(0).getNode(); + SDNode *N1 = N->getOperand(1).getNode(); + return N0->hasOneUse() && N1->hasOneUse() && + isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); + } + return false; +} + static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. @@ -4298,29 +4544,73 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; - if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG)) + bool isMLA = false; + bool isN0SExt = isSignExtended(N0, DAG); + bool isN1SExt = isSignExtended(N1, DAG); + if (isN0SExt && isN1SExt) NewOpc = ARMISD::VMULLs; - else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG)) - NewOpc = ARMISD::VMULLu; - else if (VT == MVT::v2i64) - // Fall through to expand this. It is not legal. - return SDValue(); - else - // Other vector multiplications are legal. - return Op; + else { + bool isN0ZExt = isZeroExtended(N0, DAG); + bool isN1ZExt = isZeroExtended(N1, DAG); + if (isN0ZExt && isN1ZExt) + NewOpc = ARMISD::VMULLu; + else if (isN1SExt || isN1ZExt) { + // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these + // into (s/zext A * s/zext C) + (s/zext B * s/zext C) + if (isN1SExt && isAddSubSExt(N0, DAG)) { + NewOpc = ARMISD::VMULLs; + isMLA = true; + } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { + NewOpc = ARMISD::VMULLu; + isMLA = true; + } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { + std::swap(N0, N1); + NewOpc = ARMISD::VMULLu; + isMLA = true; + } + } + + if (!NewOpc) { + if (VT == MVT::v2i64) + // Fall through to expand this. It is not legal. + return SDValue(); + else + // Other vector multiplications are legal. + return Op; + } + } // Legalize to a VMULL instruction. DebugLoc DL = Op.getDebugLoc(); - SDValue Op0 = SkipExtension(N0, DAG); + SDValue Op0; SDValue Op1 = SkipExtension(N1, DAG); - - assert(Op0.getValueType().is64BitVector() && - Op1.getValueType().is64BitVector() && - "unexpected types for extended operands to VMULL"); - return DAG.getNode(NewOpc, DL, VT, Op0, Op1); + if (!isMLA) { + Op0 = SkipExtension(N0, DAG); + assert(Op0.getValueType().is64BitVector() && + Op1.getValueType().is64BitVector() && + "unexpected types for extended operands to VMULL"); + return DAG.getNode(NewOpc, DL, VT, Op0, Op1); + } + + // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during + // isel lowering to take advantage of no-stall back to back vmul + vmla. + // vmull q0, d4, d6 + // vmlal q0, d5, d6 + // is faster than + // vaddl q0, d4, d5 + // vmovl q1, d6 + // vmul q0, q0, q1 + SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); + SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); + EVT Op1VT = Op1.getValueType(); + return DAG.getNode(N0->getOpcode(), DL, VT, + DAG.getNode(NewOpc, DL, VT, + DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), + DAG.getNode(NewOpc, DL, VT, + DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } -static SDValue +static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); @@ -4331,7 +4621,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); // Get reciprocal estimate. // float4 recip = vrecpeq_f32(yf); - Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); // Because char has a smaller range than uchar, we can actually get away // without any newton steps. This requires that we use a weird bias @@ -4349,7 +4639,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { return X; } -static SDValue +static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { SDValue N2; // Convert to float. @@ -4359,13 +4649,13 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); - + // Use reciprocal estimate and one refinement step. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); - N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); @@ -4395,15 +4685,15 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; - + if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); - + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, @@ -4414,7 +4704,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); - + N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; } @@ -4430,32 +4720,32 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; - + if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); - + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0)); - + N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 - + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); - - N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, + + N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), N0); return N0; } - + // v4i16 sdiv ... Convert to float. // float4 yf = vcvt_f32_s32(vmovl_u16(y)); // float4 xf = vcvt_f32_s32(vmovl_u16(x)); @@ -4468,13 +4758,13 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); // recip *= vrecpsq_f32(yf, recip); - N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); @@ -4503,7 +4793,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalAddress: return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -4524,7 +4814,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); - case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); + case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); @@ -4754,6 +5044,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + bool isThumb2 = Subtarget->isThumb2(); + unsigned ldrOpc, strOpc, extendOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; + extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + extendOpc = 0; + break; + } + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrex dest, ptr + // (sign extend dest, if required) + // cmp dest, incr + // cmov.cond scratch2, dest, incr + // strex scratch, scratch2, ptr + // cmp scratch, #0 + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + + // Sign extend the value, if necessary. + if (signExtend && extendOpc) { + oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); + } + + // Build compare and cmov instructions. + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(oldval).addReg(incr)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) + .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), @@ -4763,6 +5156,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } +// FIXME: This opcode table should obviously be expressed in the target +// description. We probably just need a "machine opcode" value in the pseudo +// instruction. But the ideal solution maybe to simply remove the "S" version +// of the opcode altogether. +struct AddSubFlagsOpcodePair { + unsigned PseudoOpc; + unsigned MachineOpc; +}; + +static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { + {ARM::ADCSri, ARM::ADCri}, + {ARM::ADCSrr, ARM::ADCrr}, + {ARM::ADCSrs, ARM::ADCrs}, + {ARM::SBCSri, ARM::SBCri}, + {ARM::SBCSrr, ARM::SBCrr}, + {ARM::SBCSrs, ARM::SBCrs}, + {ARM::RSBSri, ARM::RSBri}, + {ARM::RSBSrr, ARM::RSBrr}, + {ARM::RSBSrs, ARM::RSBrs}, + {ARM::RSCSri, ARM::RSCri}, + {ARM::RSCSrs, ARM::RSCrs}, + {ARM::t2ADCSri, ARM::t2ADCri}, + {ARM::t2ADCSrr, ARM::t2ADCrr}, + {ARM::t2ADCSrs, ARM::t2ADCrs}, + {ARM::t2SBCSri, ARM::t2SBCri}, + {ARM::t2SBCSrr, ARM::t2SBCrr}, + {ARM::t2SBCSrs, ARM::t2SBCrs}, + {ARM::t2RSBSri, ARM::t2RSBri}, + {ARM::t2RSBSrs, ARM::t2RSBrs}, +}; + +// Convert and Add or Subtract with Carry and Flags to a generic opcode with +// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>). +// +// FIXME: Somewhere we should assert that CPSR<def> is in the correct +// position to be recognized by the target descrition as the 'S' bit. +bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, + MachineBasicBlock *BB) const { + unsigned OldOpc = MI->getOpcode(); + unsigned NewOpc = 0; + + // This is only called for instructions that need remapping, so iterating over + // the tiny opcode table is not costly. + static const int NPairs = + sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); + for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], + *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { + if (OldOpc == Pair->PseudoOpc) { + NewOpc = Pair->MachineOpc; + break; + } + } + if (!NewOpc) + return false; + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); + for (unsigned i = 0; i < MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + AddDefaultPred(MIB); + MIB.addReg(ARM::CPSR, RegState::Define); // S bit + MI->eraseFromParent(); + return true; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -4770,10 +5229,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { - default: + default: { + if (RemapAddSubWithFlags(MI, BB)) + return BB; + MI->dump(); llvm_unreachable("Unexpected instr type to insert"); - + } case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); case ARM::ATOMIC_LOAD_ADD_I16: @@ -4816,6 +5278,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_LOAD_SUB_I32: return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_MIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); + + case ARM::ATOMIC_LOAD_MAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); + + case ARM::ATOMIC_LOAD_UMIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); + + case ARM::ATOMIC_LOAD_UMAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); + case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); @@ -5034,6 +5524,42 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } +/// PerformVMULCombine +/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the +/// special multiplier accumulator forwarding. +/// vmul d3, d0, d2 +/// vmla d3, d1, d2 +/// is faster than +/// vadd d3, d0, d1 +/// vmul d3, d3, d2 +static SDValue PerformVMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasVMLxForwarding()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned Opcode = N0.getOpcode(); + if (Opcode != ISD::ADD && Opcode != ISD::SUB && + Opcode != ISD::FADD && Opcode != ISD::FSUB) { + Opcode = N0.getOpcode(); + if (Opcode != ISD::ADD && Opcode != ISD::SUB && + Opcode != ISD::FADD && Opcode != ISD::FSUB) + return SDValue(); + std::swap(N0, N1); + } + + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + return DAG.getNode(Opcode, DL, VT, + DAG.getNode(ISD::MUL, DL, VT, N00, N1), + DAG.getNode(ISD::MUL, DL, VT, N01, N1)); +} + static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -5046,6 +5572,8 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); EVT VT = N->getValueType(0); + if (VT.is64BitVector() || VT.is128BitVector()) + return PerformVMULCombine(N, DCI, Subtarget); if (VT != MVT::i32) return SDValue(); @@ -5088,12 +5616,16 @@ static SDValue PerformMULCombine(SDNode *N, static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -5127,6 +5659,9 @@ static SDValue PerformORCombine(SDNode *N, EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -5147,6 +5682,37 @@ static SDValue PerformORCombine(SDNode *N, } } + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::AND) + return SDValue(); + SDValue N1 = N->getOperand(1); + + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); + APInt SplatBits0; + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); + APInt SplatBits1; + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + } + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. @@ -5154,19 +5720,16 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) - // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // && mask == ~mask2 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) - // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // && ~mask == mask2 // (i.e., copy a bitfield value into another bitfield of the same width) - if (N0.getOpcode() != ISD::AND) - return SDValue(); if (VT != MVT::i32) return SDValue(); @@ -5209,26 +5772,26 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); unsigned Mask2 = N11C->getZExtValue(); + // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern + // as is to match. if (ARM::isBitFieldInvertedMask(Mask) && - ARM::isBitFieldInvertedMask(~Mask2) && - (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { + (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a - unsigned lsb = CountTrailingZeros_32(Mask2); + unsigned amt = CountTrailingZeros_32(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), - DAG.getConstant(lsb, MVT::i32)); + DAG.getConstant(amt, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } else if (ARM::isBitFieldInvertedMask(~Mask) && - ARM::isBitFieldInvertedMask(Mask2) && - (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { + (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && @@ -5239,7 +5802,7 @@ static SDValue PerformORCombine(SDNode *N, Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, - DAG.getConstant(Mask2, MVT::i32)); + DAG.getConstant(Mask2, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); @@ -5294,6 +5857,37 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::VMOVDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); + + // vmovrrd(load f64) -> (load i32), (load i32) + SDNode *InNode = InDouble.getNode(); + if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && + InNode->getValueType(0) == MVT::f64 && + InNode->getOperand(1).getOpcode() == ISD::FrameIndex && + !cast<LoadSDNode>(InNode)->isVolatile()) { + // TODO: Should this be done for non-FrameIndex operands? + LoadSDNode *LD = cast<LoadSDNode>(InNode); + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = LD->getDebugLoc(); + SDValue BasePtr = LD->getBasePtr(); + SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, + LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, + LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), + std::min(4U, LD->getAlignment() / 2)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); + DCI.RemoveFromWorklist(LD); + DAG.DeleteNode(LD); + return Result; + } + return SDValue(); } @@ -5323,8 +5917,28 @@ static SDValue PerformSTORECombine(SDNode *N, // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast<StoreSDNode>(N); SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile() || - StVal.getValueType() != MVT::i64 || + if (!ISD::isNormalStore(St) || St->isVolatile()) + return SDValue(); + + if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && + StVal.getNode()->hasOneUse() && !St->isVolatile()) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = St->getDebugLoc(); + SDValue BasePtr = St->getBasePtr(); + SDValue NewST1 = DAG.getStore(St->getChain(), DL, + StVal.getNode()->getOperand(0), BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + OffsetPtr, St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), + std::min(4U, St->getAlignment() / 2)); + } + + if (StVal.getValueType() != MVT::i64 || StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); @@ -5553,7 +6167,7 @@ static SDValue CombineBaseUpdate(SDNode *N, EVT VecTy; if (isLoad) VecTy = N->getValueType(0); - else + else VecTy = N->getOperand(AddrOpIdx+1).getValueType(); unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) @@ -5603,7 +6217,7 @@ static SDValue CombineBaseUpdate(SDNode *N, DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); break; - } + } return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index dc400c485ec6..a2e626062ac6 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -57,7 +57,6 @@ namespace llvm { CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. - CNEG, // ARM conditional negate instructions. BCC_i64, @@ -89,7 +88,7 @@ namespace llvm { MEMBARRIER_MCR, // Memory barrier (MCR) PRELOAD, // Preload - + VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. @@ -154,6 +153,8 @@ namespace llvm { VZIP, // zip (interleave) VUZP, // unzip (deinterleave) VTRN, // transpose + VTBL1, // 1-register shuffle with mask + VTBL2, // 2-register shuffle with mask // Vector multiply long: VMULLs, // ...signed @@ -172,12 +173,15 @@ namespace llvm { // Bit-field insert BFI, - + // Vector OR with immediate VORRIMM, // Vector AND with NOT of immediate VBICIMM, + // Vector bitwise select + VBSL, + // Vector load N-element structure to all lanes: VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, VLD3DUP, @@ -330,9 +334,6 @@ namespace llvm { Sched::Preference getSchedulingPreference(SDNode *N) const; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; - bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -407,7 +408,7 @@ namespace llvm { SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; @@ -425,6 +426,13 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, unsigned ArgOffset) + const; + + void computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned &VARegSize, unsigned &VARegSaveSize) const; + virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, @@ -435,6 +443,9 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + /// HandleByVal - Target-specific cleanup for ByVal support. + virtual void HandleByVal(CCState *, unsigned &) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -456,10 +467,13 @@ namespace llvm { virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, DebugLoc dl) const; + SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; @@ -470,16 +484,22 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode) const; + MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const; + bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; }; - + enum NEONModImmType { VMOVModImm, VMVNModImm, OtherModImm }; - - + + namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 359ac45cee1d..f5fb98ece4af 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -206,19 +206,30 @@ def setend_op : Operand<i32> { let PrintMethod = "printSetendOperand"; } -def cps_opt : Operand<i32> { - let PrintMethod = "printCPSOptionOperand"; -} - def msr_mask : Operand<i32> { let PrintMethod = "printMSRMaskOperand"; let ParserMatchClass = MSRMaskOperand; } -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -def neg_zero : Operand<i32> { - let PrintMethod = "printNegZeroOperand"; +// Shift Right Immediate - A shift right immediate is encoded differently from +// other shift immediates. The imm6 field is encoded like so: +// +// Offset Encoding +// 8 imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0> +// 16 imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0> +// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0> +// 64 64 - <imm> is encoded in imm6<5:0> +def shr_imm8 : Operand<i32> { + let EncoderMethod = "getShiftRight8Imm"; +} +def shr_imm16 : Operand<i32> { + let EncoderMethod = "getShiftRight16Imm"; +} +def shr_imm32 : Operand<i32> { + let EncoderMethod = "getShiftRight32Imm"; +} +def shr_imm64 : Operand<i32> { + let EncoderMethod = "getShiftRight64Imm"; } //===----------------------------------------------------------------------===// @@ -279,6 +290,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern> let OutOperandList = oops; let InOperandList = iops; let Pattern = pattern; + let isCodeGenOnly = 1; } // PseudoInst that's ARM-mode only. @@ -422,11 +434,11 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin, opc, asm, "", pattern> { bits<4> Rd; bits<4> Rt; - bits<4> Rn; + bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 0; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rd; let Inst{11-4} = 0b11111001; let Inst{3-0} = Rt; @@ -513,6 +525,24 @@ class AI2stridx<bit isByte, bit isPre, dag oops, dag iops, let Inst{19-16} = Rn; let Inst{11-0} = offset{11-0}; } +// FIXME: Merge with the above class when addrmode2 gets used for STR, STRB +// but for now use this class for STRT and STRBT. +class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; +} // addrmode3 instructions class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f, @@ -547,6 +577,34 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, let Inst{15-12} = Rt; // Rt let Inst{7-4} = op; } + +// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB +// but for now use this class for LDRSBT, LDRHT, LDSHT. +class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list<dag> pattern> + : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin, + opc, asm, cstr, pattern> { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + bits<14> addr; + bits<4> Rt; + let Inst{27-25} = 0b000; + let Inst{24} = isPre; // P bit + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{20} = op20; // L bit + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3"; +} + class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> @@ -619,12 +677,25 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin, opc, asm, cstr,pattern> { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + bits<14> addr; + bits<4> Rt; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit let Inst{7} = 1; + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{15-12} = Rt; // Rt + let Inst{19-16} = addr{12-9}; // Rn let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{23} = addr{8}; // U bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -1670,7 +1741,8 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, } // NEON 3 vector register format. -class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + +class N3VCommon<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> { @@ -1680,6 +1752,13 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{11-8} = op11_8; let Inst{6} = op6; let Inst{4} = op4; +} + +class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : N3VCommon<op24, op23, op21_20, op11_8, op6, op4, + oops, iops, f, itin, opc, dt, asm, cstr, pattern> { // Instruction operands. bits<5> Vd; @@ -1694,6 +1773,47 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{5} = Vm{4}; } +class N3VLane32<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : N3VCommon<op24, op23, op21_20, op11_8, op6, op4, + oops, iops, f, itin, opc, dt, asm, cstr, pattern> { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bit lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = lane; +} + +class N3VLane16<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : N3VCommon<op24, op23, op21_20, op11_8, op6, op4, + oops, iops, f, itin, opc, dt, asm, cstr, pattern> { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bits<2> lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{2-0} = Vm{2-0}; + let Inst{5} = lane{1}; + let Inst{3} = lane{0}; +} + // Same as N3V except it doesn't have a data type suffix. class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, @@ -1730,6 +1850,8 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; let Inst{4} = 1; + // A8.6.303, A8.6.328, A8.6.329 + let Inst{3-0} = 0b0000; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 6e3fe2e039f5..209c1a3fd96a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -58,7 +58,7 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 0, []>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -93,8 +93,6 @@ def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; -def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov, - [SDNPInGlue]>; def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -205,13 +203,13 @@ def so_imm_not_XFORM : SDNodeXForm<imm, [{ }]>; /// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. -def imm1_15 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16; +def imm1_15 : ImmLeaf<i32, [{ + return (int32_t)Imm >= 1 && (int32_t)Imm < 16; }]>; /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. -def imm16_31 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32; +def imm16_31 : ImmLeaf<i32, [{ + return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; def so_imm_neg : @@ -241,8 +239,8 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ /// imm0_65535 predicate - True if the 32-bit immediate is in the range /// [0.65535]. -def imm0_65535 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 65536; +def imm0_65535 : ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 65536; }]>; class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>; @@ -377,17 +375,23 @@ def neon_vcvt_imm32 : Operand<i32> { } // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. -def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{ - int32_t v = (int32_t)N->getZExtValue(); +def rot_imm : Operand<i32>, ImmLeaf<i32, [{ + int32_t v = (int32_t)Imm; return v == 8 || v == 16 || v == 24; }]> { let EncoderMethod = "getRotImmOpValue"; } +def ShifterAsmOperand : AsmOperandClass { + let Name = "Shifter"; + let SuperClasses = []; +} + // shift_imm: An integer that encodes a shift amount and the type of shift // (currently either asr or lsl) using the same encoding used for the // immediates in so_reg operands. def shift_imm : Operand<i32> { let PrintMethod = "printShiftImmOperand"; + let ParserMatchClass = ShifterAsmOperand; } // shifter_operand operands: so_reg and so_imm. @@ -396,19 +400,21 @@ def so_reg : Operand<i32>, // reg reg imm [shl,srl,sra,rotr]> { let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, i32imm); + let MIOperandInfo = (ops GPR, GPR, shift_imm); } def shift_so_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectShiftShifterOperandReg", [shl,srl,sra,rotr]> { let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, i32imm); + let MIOperandInfo = (ops GPR, GPR, shift_imm); } // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. -def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> { +def so_imm : Operand<i32>, ImmLeaf<i32, [{ + return ARM_AM::getSOImmVal(Imm) != -1; + }]> { let EncoderMethod = "getSOImmOpValue"; let PrintMethod = "printSOImmOperand"; } @@ -429,13 +435,13 @@ def arm_i32imm : PatLeaf<(imm), [{ }]>; /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def imm0_31 : Operand<i32>, PatLeaf<(imm), [{ - return (int32_t)N->getZExtValue() < 32; +def imm0_31 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 32; }]>; /// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'. -def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{ - return (int32_t)N->getZExtValue() < 32; +def imm0_31_m1 : Operand<i32>, ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 32; }]> { let EncoderMethod = "getImmMinusOneOpValue"; } @@ -458,19 +464,30 @@ def bf_inv_mask_imm : Operand<i32>, } /// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p -def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{ - return isInt<5>(N->getSExtValue()); +def lsb_pos_imm : Operand<i32>, ImmLeaf<i32, [{ + return isInt<5>(Imm); }]>; /// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p -def width_imm : Operand<i32>, PatLeaf<(imm), [{ - return N->getSExtValue() > 0 && N->getSExtValue() <= 32; +def width_imm : Operand<i32>, ImmLeaf<i32, [{ + return Imm > 0 && Imm <= 32; }] > { let EncoderMethod = "getMsbOpValue"; } // Define ARM specific addressing modes. +def MemMode2AsmOperand : AsmOperandClass { + let Name = "MemMode2"; + let SuperClasses = []; + let ParserMethod = "tryParseMemMode2Operand"; +} + +def MemMode3AsmOperand : AsmOperandClass { + let Name = "MemMode3"; + let SuperClasses = []; + let ParserMethod = "tryParseMemMode3Operand"; +} // addrmode_imm12 := reg +/- imm12 // @@ -501,6 +518,7 @@ def addrmode2 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode2", []> { let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; + let ParserMatchClass = MemMode2AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } @@ -519,6 +537,7 @@ def addrmode3 : Operand<i32>, ComplexPattern<i32, 3, "SelectAddrMode3", []> { let EncoderMethod = "getAddrMode3OpValue"; let PrintMethod = "printAddrMode3Operand"; + let ParserMatchClass = MemMode3AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } @@ -586,6 +605,21 @@ def addrmodepc : Operand<i32>, let MIOperandInfo = (ops GPR, i32imm); } +def MemMode7AsmOperand : AsmOperandClass { + let Name = "MemMode7"; + let SuperClasses = []; +} + +// addrmode7 := reg +// Used by load/store exclusive instructions. Useful to enable right assembly +// parsing and printing. Not used for any codegen matching. +// +def addrmode7 : Operand<i32> { + let PrintMethod = "printAddrMode7Operand"; + let MIOperandInfo = (ops GPR); + let ParserMatchClass = MemMode7AsmOperand; +} + def nohash_imm : Operand<i32> { let PrintMethod = "printNoHashImmediate"; } @@ -902,52 +936,23 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{19-16} = Rn; } } +} + // Carry setting variants -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { - def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"), - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; - let Inst{20} = 1; - let Inst{25} = 1; - } - def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), - DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"), - [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>; + def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + Size4Bytes, IIC_iALUr, + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { let isCommutable = Commutable; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{20} = 1; - let Inst{25} = 0; - } - def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"), - [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{20} = 1; - let Inst{25} = 0; } -} + def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>; } } @@ -972,6 +977,7 @@ multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii, [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> { bits<4> Rt; bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 let Inst{23} = shift{12}; // U (add = ('U' == 1)) let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = Rt; @@ -1001,6 +1007,7 @@ multiclass AI_str1<bit isByte, string opc, InstrItinClass iii, [(opnode GPR:$Rt, ldst_so_reg:$shift)]> { bits<4> Rt; bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 let Inst{23} = shift{12}; // U (add = ('U' == 1)) let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = Rt; @@ -1249,7 +1256,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in // The 'adr' mnemonic encodes differently if the label is before or after // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. -def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label), +def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> { bits<4> Rd; bits<12> label; @@ -1311,6 +1318,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // before calls from potentially appearing dead. let isCall = 1, // On non-Darwin platforms R9 is callee-saved. + // FIXME: Do we really need a non-predicated version? If so, it should + // at least be a pseudo instruction expanding to the predicated version + // at MC lowering time. Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -1340,7 +1350,16 @@ let isCall = 1, Requires<[IsARM, HasV5T, IsNotDarwin]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; - let Inst{3-0} = func; + let Inst{3-0} = func; + } + + def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + IIC_Br, "blx", "\t$func", + [(ARMcall_pred GPR:$func)]>, + Requires<[IsARM, HasV5T, IsNotDarwin]> { + bits<4> func; + let Inst{27-4} = 0b000100101111111111110011; + let Inst{3-0} = func; } // ARMv4T @@ -1364,30 +1383,25 @@ let isCall = 1, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR], Uses = [R7, SP] in { - def BLr9 : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops), - IIC_Br, "bl\t$func", - [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> { - let Inst{31-28} = 0b1110; - bits<24> func; - let Inst{23-0} = func; - } + def BLr9 : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>; - def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops), - IIC_Br, "bl", "\t$func", + def BLr9_pred : ARMPseudoInst<(outs), + (ins bltarget:$func, pred:$p, variable_ops), + Size4Bytes, IIC_Br, [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsDarwin]> { - bits<24> func; - let Inst{23-0} = func; - } + Requires<[IsARM, IsDarwin]>; // ARMv5T and above - def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, - IIC_Br, "blx\t$func", - [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> { - bits<4> func; - let Inst{31-4} = 0b1110000100101111111111110011; - let Inst{3-0} = func; - } + def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>; + + def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall_pred GPR:$func)]>, + Requires<[IsARM, HasV5T, IsDarwin]>; // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. @@ -1403,11 +1417,7 @@ let isCall = 1, // Tail calls. -// FIXME: These should probably be xformed into the non-TC versions of the -// instructions as part of MC lowering. -// FIXME: These seem to be used for both Thumb and ARM instruction selection. -// Thumb should have its own version since the instruction is actually -// different, even though the mnemonic is the same. +// FIXME: The Thumb versions of these should live in ARMInstrThumb.td let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin versions. let Defs = [R0, R1, R2, R3, R9, R12, @@ -1421,21 +1431,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), IIC_Br, []>, Requires<[IsDarwin]>; - def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b\t$dst @ TAILCALL", + def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsARM, IsDarwin]>; - def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b.w\t$dst @ TAILCALL", + def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsThumb, IsDarwin]>; - def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops), - BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", - []>, Requires<[IsDarwin]> { - bits<4> dst; - let Inst{31-4} = 0b1110000100101111111111110001; - let Inst{3-0} = dst; - } + def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsARM, IsDarwin]>; + + def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsThumb, IsDarwin]>; } // Non-Darwin versions (the difference is R9). @@ -1450,34 +1460,31 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), IIC_Br, []>, Requires<[IsNotDarwin]>; - def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b\t$dst @ TAILCALL", + def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsARM, IsNotDarwin]>; - def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b.w\t$dst @ TAILCALL", + def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsThumb, IsNotDarwin]>; - def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops), - BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", - []>, Requires<[IsNotDarwin]> { - bits<4> dst; - let Inst{31-4} = 0b1110000100101111111111110001; - let Inst{3-0} = dst; - } + def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsARM, IsNotDarwin]>; + def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsThumb, IsNotDarwin]>; } } let isBranch = 1, isTerminator = 1 in { - // B is "predicable" since it can be xformed into a Bcc. + // B is "predicable" since it's just a Bcc with an 'always' condition. let isBarrier = 1 in { let isPredicable = 1 in - def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]> { - bits<24> target; - let Inst{31-28} = 0b1110; - let Inst{23-0} = target; - } + // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly + // should be sufficient. + def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br, + [(br bb:$target)]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), @@ -1509,6 +1516,16 @@ let isBranch = 1, isTerminator = 1 in { } } +// BLX (immediate) -- for disassembly only +def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary, + "blx\t$target", [/* pattern left blank */]>, + Requires<[IsARM, HasV5T]> { + let Inst{31-25} = 0b1111101; + bits<25> target; + let Inst{23-0} = target{24-1}; + let Inst{24} = target{0}; +} + // Branch and Exchange Jazelle -- for disassembly only def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", [/* For disassembly only; pattern left blank */]> { @@ -1533,6 +1550,7 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", let Inst{23-0} = svc; } } +def : MnemonicAlias<"swi", "svc">; // Store Return State is a system instruction -- for disassembly only let isCodeGenOnly = 1 in { // FIXME: This should not use submode! @@ -1541,6 +1559,8 @@ def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b110; // W = 1 + let Inst{19-8} = 0xd05; + let Inst{7-5} = 0b000; } def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), @@ -1548,6 +1568,8 @@ def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b100; // W = 0 + let Inst{19-8} = 0xd05; + let Inst{7-5} = 0b000; } // Return From Exception is a system instruction -- for disassembly only @@ -1556,6 +1578,7 @@ def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b011; // W = 1 + let Inst{15-0} = 0x0a00; } def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), @@ -1563,6 +1586,7 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b001; // W = 0 + let Inst{15-0} = 0x0a00; } } // isCodeGenOnly = 1 @@ -1610,15 +1634,11 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, - isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring? -// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1) -// how to represent that such that tblgen is happy and we don't -// mark this codegen only? +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr", + IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr", []>, Requires<[IsARM, HasV5TE]>; } @@ -1636,6 +1656,7 @@ multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> { let Inst{23} = addr{12}; let Inst{19-16} = addr{17-14}; let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; } def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins GPR:$Rn, am2offset:$offset), @@ -1688,40 +1709,80 @@ let mayLoad = 1, neverHasSideEffects = 1 in { defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>; defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>; defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>; -let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -defm LDRD : AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>; +let hasExtraDefRegAllocReq = 1 in { +def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins addrmode3:$addr), IndexModePre, + LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm +} +def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins GPR:$Rn, am3offset:$offset), IndexModePost, + LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, [$Rn], $offset", + "$Rn = $Rn_wb", []> { + bits<10> offset; + bits<4> Rn; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = Rn; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm +} +} // hasExtraDefRegAllocReq = 1 } // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. let mayLoad = 1, neverHasSideEffects = 1 in { -def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), IndexModeNone, - LdFrm, IIC_iLoad_ru, - "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; let Inst{21} = 1; // overwrite -} -def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), IndexModeNone, - LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; +} +def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; let Inst{21} = 1; // overwrite + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; } -def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } -def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } -def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } } @@ -1734,55 +1795,61 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1, - isCodeGenOnly = 1 in // $src2 doesn't exist in asm string -def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in +def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStore_d_r, - "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePre, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "str", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePost, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "str", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePre, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "strb", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePost, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "strb", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), IndexModePre, StMiscFrm, IIC_iStore_ru, - "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "strh", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "strh", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; // For disassembly only +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), StMiscFrm, IIC_iStore_d_ru, @@ -1795,31 +1862,32 @@ def STRD_POST: AI3stdpo<(outs GPR:$base_wb), StMiscFrm, IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base], $offset", "$base = $base_wb", []>; +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 // STRT, STRBT, and STRHT are for disassembly only. -def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn,am2offset:$offset), - IndexModeNone, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb", + [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; } -def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModeNone, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { +def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb", + [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; } -def STRHT: AI3sthpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), +def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, IIC_iStore_bh_ru, - "strht", "\t$src, [$base], $offset", "$base = $base_wb", + "strht", "\t$Rt, $addr", "$addr.base = $base_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode3"; } //===----------------------------------------------------------------------===// @@ -1892,7 +1960,7 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; } -} +} let neverHasSideEffects = 1 in { @@ -1912,16 +1980,10 @@ def : MnemonicAlias<"stm", "stmia">; // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -// FIXME: Should be a pseudo-instruction. -def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, - reglist:$regs, variable_ops), - IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr, - "ldmia${p}\t$Rn!, $regs", - "$Rn = $wb", []> { - let Inst{24-23} = 0b01; // Increment After - let Inst{21} = 1; // Writeback - let Inst{20} = 1; // Load -} +def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + Size4Bytes, IIC_iLoad_mBr, []>, + RegConstraint<"$Rn = $wb">; //===----------------------------------------------------------------------===// // Move Instructions. @@ -1933,6 +1995,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, bits<4> Rd; bits<4> Rm; + let Inst{19-16} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{25} = 0; let Inst{3-0} = Rm; @@ -1959,6 +2022,7 @@ def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; let Inst{11-0} = src; let Inst{25} = 0; } @@ -2145,10 +2209,12 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; // ADC and SUBC with 's' bit set. -defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; +let usesCustomInserter = 1 in { +defm ADCS : AI1_adde_sube_s_irs< + BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; +defm SBCS : AI1_adde_sube_s_irs< + BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; +} def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm", @@ -2190,31 +2256,17 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), } // RSB with 's' bit set. -let isCodeGenOnly = 1, Defs = [CPSR] in { -def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>; +def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + Size4Bytes, IIC_iALUr, + [/* For disassembly only; pattern left blank */]>; +def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>; } let Uses = [CPSR] in { @@ -2258,34 +2310,14 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), } } -// FIXME: Allow these to be predicated. -let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in { -def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1, Uses = [CPSR] in { +def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>; +def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>; } // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. @@ -2300,8 +2332,10 @@ def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(adde GPR:$src, so_imm_not:$imm), +def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm), (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), + (SBCSri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -2617,14 +2651,16 @@ def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]> { + let Inst{15-12} = 0b0000; +} } let Constraints = "@earlyclobber $Rd" in def MLAv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMAC32, - [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, + Size4Bytes, IIC_iMAC32, + [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, Requires<[IsARM, NoV6]> { bits<4> Ra; let Inst{15-12} = Ra; @@ -2657,7 +2693,7 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in { let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMUL64, []>, Requires<[IsARM, NoV6]>; @@ -2681,15 +2717,15 @@ def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), // Multiply + accumulate let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; @@ -2970,17 +3006,25 @@ def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", [(set GPR:$Rd, (sext_inreg - (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), + (or (srl GPR:$Rm, (i32 8)), (shl GPR:$Rm, (i32 8))), i16))]>, Requires<[IsARM, HasV6]>; +def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), + (shl GPR:$Rm, (i32 8))), i16), + (REVSH GPR:$Rm)>; + +// Need the AddedComplexity or else MOVs + REV would be chosen. +let AddedComplexity = 5 in +def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>; + def lsl_shift_imm : SDNodeXForm<imm, [{ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue()); return CurDAG->getTargetConstant(Sh, MVT::i32); }]>; -def lsl_amt : PatLeaf<(i32 imm), [{ - return (N->getZExtValue() < 32); +def lsl_amt : ImmLeaf<i32, [{ + return Imm > 0 && Imm < 32; }], lsl_shift_imm>; def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd), @@ -3002,8 +3046,8 @@ def asr_shift_imm : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(Sh, MVT::i32); }]>; -def asr_amt : PatLeaf<(i32 imm), [{ - return (N->getZExtValue() <= 32); +def asr_amt : ImmLeaf<i32, [{ + return Imm > 0 && Imm <= 32; }], asr_shift_imm>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and @@ -3119,88 +3163,43 @@ def BCCZi64 : PseudoInst<(outs), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( -// FIXME: These should all be pseudo-instructions that get expanded to -// the normal MOV instructions. That would fix the dependency on -// special casing them in tblgen. let neverHasSideEffects = 1 in { -def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm, - IIC_iCMOVr, "mov", "\t$Rd, $Rm", - [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<4> Rm; - let Inst{25} = 0; - let Inst{20} = 0; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; -} - -def MOVCCs : AI1<0b1101, (outs GPR:$Rd), - (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr, - "mov", "\t$Rd, $shift", - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 0; - let Inst{19-16} = 0; - let Inst{15-12} = Rd; - let Inst{11-0} = shift; -} +def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), + Size4Bytes, IIC_iCMOVr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; +def MOVCCs : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg:$shift, pred:$p), + Size4Bytes, IIC_iCMOVsr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; let isMoveImm = 1 in -def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm), - DPFrm, IIC_iMOVi, - "movw", "\t$Rd, $imm", - []>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, - UnaryDP { - bits<4> Rd; - bits<16> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = imm{15-12}; - let Inst{15-12} = Rd; - let Inst{11-0} = imm{11-0}; -} +def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm_hilo16:$imm, pred:$p), + Size4Bytes, IIC_iMOVi, + []>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in -def MOVCCi : AI1<0b1101, (outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, - "mov", "\t$Rd, $imm", +def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm, pred:$p), + Size4Bytes, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; -} + RegConstraint<"$false = $Rd">; // Two instruction predicate mov immediate. let isMoveImm = 1 in -def MOVCCi32imm : PseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, pred:$p), - IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; +def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, pred:$p), + Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; let isMoveImm = 1 in -def MVNCCi : AI1<0b1111, (outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, - "mvn", "\t$Rd, $imm", +def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm, pred:$p), + Size4Bytes, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; -} + RegConstraint<"$false = $Rd">; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -3221,13 +3220,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{31-4} = 0xf57ff05; let Inst{3-0} = opt; } - -def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, - "mcr", "\tp15, 0, $zero, c7, c10, 5", - [(ARMMemBarrierMCR GPR:$zero)]>, - Requires<[IsARM, HasV6]> { - // FIXME: add encoding -} } def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, @@ -3266,6 +3258,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; @@ -3284,6 +3288,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; @@ -3302,6 +3318,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, @@ -3326,39 +3354,26 @@ let usesCustomInserter = 1 in { } let mayLoad = 1 in { -def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrexb", "\t$Rt, [$Rn]", - []>; -def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrexh", "\t$Rt, [$Rn]", - []>; -def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrex", "\t$Rt, [$Rn]", - []>; -def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn), - NoItinerary, - "ldrexd", "\t$Rt, $Rt2, [$Rn]", - []>; +def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrexb", "\t$Rt, $addr", []>; +def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrexh", "\t$Rt, $addr", []>; +def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrex", "\t$Rt, $addr", []>; +def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr), + NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>; } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn), - NoItinerary, - "strexb", "\t$Rd, $src, [$Rn]", - []>; -def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), - NoItinerary, - "strexh", "\t$Rd, $Rt, [$Rn]", - []>; -def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), - NoItinerary, - "strex", "\t$Rd, $Rt, [$Rn]", - []>; +def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; +def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; +def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn), - NoItinerary, - "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", - []>; + (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>; } // Clear-Exclusive is for disassembly only. @@ -3377,238 +3392,7 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb", } //===----------------------------------------------------------------------===// -// TLS Instructions -// - -// __aeabi_read_tp preserves the registers r1-r3. -// This is a pseudo inst so that we can get the encoding right, -// complete with fixup for the aeabi_read_tp function. -let isCall = 1, - Defs = [R0, R12, LR, CPSR], Uses = [SP] in { - def TPsoft : PseudoInst<(outs), (ins), IIC_Br, - [(set R0, ARMthread_pointer)]>; -} - -//===----------------------------------------------------------------------===// -// SJLJ Exception handling intrinsics -// eh_sjlj_setjmp() is an instruction sequence to store the return -// address and save #0 in R0 for the non-longjmp case. -// Since by its nature we may be coming from some other function to get -// here, and we're using the stack frame for the containing function to -// save/restore registers, we can't keep anything live in regs across -// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everthing out of registers -// except for our own input by listing the relevant registers in Defs. By -// doing so, we also cause the prologue/epilogue code to actively preserve -// all of the callee-saved resgisters, which is exactly what we want. -// A constant value is passed in $val, and we use the location as a scratch. -// -// These are pseudo-instructions and are lowered to individual MC-insts, so -// no encoding information is necessary. -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, - D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ], hasSideEffects = 1, isBarrier = 1 in { - def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), - NoItinerary, - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, - Requires<[IsARM, HasVFP2]>; -} - -let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], - hasSideEffects = 1, isBarrier = 1 in { - def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), - NoItinerary, - [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, - Requires<[IsARM, NoVFP]>; -} - -// FIXME: Non-Darwin version(s) -let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, - Defs = [ R7, LR, SP ] in { -def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), - NoItinerary, - [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM, IsDarwin]>; -} - -// eh.sjlj.dispatchsetup pseudo-instruction. -// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are -// handled when the pseudo is expanded (which happens before any passes -// that need the instruction size). -let isBarrier = 1, hasSideEffects = 1 in -def Int_eh_sjlj_dispatchsetup : - PseudoInst<(outs), (ins GPR:$src), NoItinerary, - [(ARMeh_sjlj_dispatchsetup GPR:$src)]>, - Requires<[IsDarwin]>; - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -// - -// Large immediate handling. - -// 32-bit immediate using two piece so_imms or movw + movt. -// This is a single pseudo instruction, the benefit is that it can be remat'd -// as a single unit instead of having to handle reg inputs. -// FIXME: Remove this when we can do generalized remat. -let isReMaterializable = 1, isMoveImm = 1 in -def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, - [(set GPR:$dst, (arm_i32imm:$src))]>, - Requires<[IsARM]>; - -// Pseudo instruction that combines movw + movt + add pc (if PIC). -// It also makes it possible to rematerialize the instructions. -// FIXME: Remove this when we can do generalized remat and when machine licm -// can properly the instructions. -let isReMaterializable = 1 in { -def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2addpc, - [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovt]>; - -def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2, - [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovt]>; - -let AddedComplexity = 10 in -def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), - IIC_iMOVix2ld, - [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, - Requires<[IsARM, UseMovt]>; -} // isReMaterializable - -// ConstantPool, GlobalAddress, and JumpTable -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, - Requires<[IsARM, DontUseMovt]>; -def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, - Requires<[IsARM, UseMovt]>; -def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), - (LEApcrelJT tjumptable:$dst, imm:$id)>; - -// TODO: add,sub,and, 3-instr forms? - -// Tail calls -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; - -def : ARMPat<(ARMtcret tcGPR:$dst), - (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; - -def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; - -def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), - (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; - -// Direct calls -def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, - Requires<[IsARM, IsNotDarwin]>; -def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, - Requires<[IsARM, IsDarwin]>; - -// zextload i1 -> zextload i8 -def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; - -// extload -> zextload -def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; -def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; -def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; - -def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; - -def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; -def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; - -// smul* and smla* -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16))), - (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; - -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), - (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), - (sra (shl GPR:$b, (i32 16)), (i32 16)))), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5TEPat<(add GPR:$acc, - (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; - -//===----------------------------------------------------------------------===// -// Thumb Support -// - -include "ARMInstrThumb.td" - -//===----------------------------------------------------------------------===// -// Thumb2 Support -// - -include "ARMInstrThumb2.td" - -//===----------------------------------------------------------------------===// -// Floating Point Support -// - -include "ARMInstrVFP.td" - -//===----------------------------------------------------------------------===// -// Advanced SIMD (NEON) Support -// - -include "ARMInstrNEON.td" - -//===----------------------------------------------------------------------===// -// Coprocessor Instructions. For disassembly only. +// Coprocessor Instructions. // def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, @@ -3652,17 +3436,18 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, let Inst{23-20} = opc1; } -class ACI<dag oops, dag iops, string opc, string asm> - : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary, - opc, asm, "", [/* For disassembly only; pattern left blank */]> { +class ACI<dag oops, dag iops, string opc, string asm, + IndexMode im = IndexModeNone> + : InoP<oops, iops, AddrModeNone, Size4Bytes, im, BrFrm, NoItinerary, + opc, asm, "", [/* For disassembly only; pattern left blank */]> { let Inst{27-25} = 0b110; } -multiclass LdStCop<bits<4> op31_28, bit load, string opc> { +multiclass LdStCop<bits<4> op31_28, bit load, dag ops, string opc, string cond>{ def _OFFSET : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "\tp$cop, cr$CRd, $addr"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 0; // W = 0 @@ -3671,8 +3456,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def _PRE : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "\tp$cop, cr$CRd, $addr!"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 1; // W = 1 @@ -3681,8 +3466,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def _POST : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - opc, "\tp$cop, cr$CRd, [$base], $offset"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{21} = 1; // W = 1 @@ -3691,8 +3476,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def _OPTION : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option), - opc, "\tp$cop, cr$CRd, [$base], $option"> { + !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option), + ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 @@ -3702,8 +3488,8 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_OFFSET : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 0; // W = 0 @@ -3712,8 +3498,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_PRE : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!", + IndexModePre> { let Inst{31-28} = op31_28; let Inst{24} = 1; // P = 1 let Inst{21} = 1; // W = 1 @@ -3722,8 +3509,9 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_POST : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr", + IndexModePost> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{21} = 1; // W = 1 @@ -3732,8 +3520,10 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } def L_OPTION : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> { + !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option), + ops), + !strconcat(!strconcat(opc, "l"), cond), + "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { let Inst{31-28} = op31_28; let Inst{24} = 0; // P = 0 let Inst{23} = 1; // U = 1 @@ -3743,19 +3533,18 @@ multiclass LdStCop<bits<4> op31_28, bit load, string opc> { } } -defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">; -defm LDC2 : LdStCop<0b1111, 1, "ldc2">; -defm STC : LdStCop<{?,?,?,?}, 0, "stc">; -defm STC2 : LdStCop<0b1111, 0, "stc2">; +defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">; +defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">; +defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">; +defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">; //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register -- for disassembly only // -class MovRCopro<string opc, bit direction> - : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", +class MovRCopro<string opc, bit direction, dag oops, dag iops> + : ABI<0b1110, oops, iops, NoItinerary, opc, + "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", [/* For disassembly only; pattern left blank */]> { let Inst{20} = direction; let Inst{4} = 1; @@ -3775,13 +3564,17 @@ class MovRCopro<string opc, bit direction> let Inst{19-16} = CRn; } -def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; -def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; +def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; +def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRn, c_imm:$CRm, i32imm:$opc2)>; -class MovRCopro2<string opc, bit direction> - : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class MovRCopro2<string opc, bit direction, dag oops, dag iops> + : ABXI<0b1110, oops, iops, NoItinerary, + !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{20} = direction; @@ -3802,8 +3595,14 @@ class MovRCopro2<string opc, bit direction> let Inst{19-16} = CRn; } -def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>; -def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>; +def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; +def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; class MovRRCopro<string opc, bit direction> : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, @@ -3909,3 +3708,241 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let Inst{15-12} = 0b1111; let Inst{11-0} = a; } + +//===----------------------------------------------------------------------===// +// TLS Instructions +// + +// __aeabi_read_tp preserves the registers r1-r3. +// This is a pseudo inst so that we can get the encoding right, +// complete with fixup for the aeabi_read_tp function. +let isCall = 1, + Defs = [R0, R12, LR, CPSR], Uses = [SP] in { + def TPsoft : PseudoInst<(outs), (ins), IIC_Br, + [(set R0, ARMthread_pointer)]>; +} + +//===----------------------------------------------------------------------===// +// SJLJ Exception handling intrinsics +// eh_sjlj_setjmp() is an instruction sequence to store the return +// address and save #0 in R0 for the non-longjmp case. +// Since by its nature we may be coming from some other function to get +// here, and we're using the stack frame for the containing function to +// save/restore registers, we can't keep anything live in regs across +// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon +// when we get here from a longjmp(). We force everything out of registers +// except for our own input by listing the relevant registers in Defs. By +// doing so, we also cause the prologue/epilogue code to actively preserve +// all of the callee-saved resgisters, which is exactly what we want. +// A constant value is passed in $val, and we use the location as a scratch. +// +// These are pseudo-instructions and are lowered to individual MC-insts, so +// no encoding information is necessary. +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, + D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, + D31 ], hasSideEffects = 1, isBarrier = 1 in { + def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, HasVFP2]>; +} + +let Defs = + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { + def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val), + NoItinerary, + [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, + Requires<[IsARM, NoVFP]>; +} + +// FIXME: Non-Darwin version(s) +let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, + Defs = [ R7, LR, SP ] in { +def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), + NoItinerary, + [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, + Requires<[IsARM, IsDarwin]>; +} + +// eh.sjlj.dispatchsetup pseudo-instruction. +// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are +// handled when the pseudo is expanded (which happens before any passes +// that need the instruction size). +let isBarrier = 1, hasSideEffects = 1 in +def Int_eh_sjlj_dispatchsetup : + PseudoInst<(outs), (ins), NoItinerary, + [(ARMeh_sjlj_dispatchsetup)]>, + Requires<[IsDarwin]>; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +// + +// Large immediate handling. + +// 32-bit immediate using two piece so_imms or movw + movt. +// This is a single pseudo instruction, the benefit is that it can be remat'd +// as a single unit instead of having to handle reg inputs. +// FIXME: Remove this when we can do generalized remat. +let isReMaterializable = 1, isMoveImm = 1 in +def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2, + [(set GPR:$dst, (arm_i32imm:$src))]>, + Requires<[IsARM]>; + +// Pseudo instruction that combines movw + movt + add pc (if PIC). +// It also makes it possible to rematerialize the instructions. +// FIXME: Remove this when we can do generalized remat and when machine licm +// can properly the instructions. +let isReMaterializable = 1 in { +def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2addpc, + [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2, + [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>, + Requires<[IsARM, UseMovt]>; + +let AddedComplexity = 10 in +def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), + IIC_iMOVix2ld, + [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, + Requires<[IsARM, UseMovt]>; +} // isReMaterializable + +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, + Requires<[IsARM, DontUseMovt]>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, + Requires<[IsARM, UseMovt]>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (LEApcrelJT tjumptable:$dst, imm:$id)>; + +// TODO: add,sub,and, 3-instr forms? + +// Tail calls +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + +// Direct calls +def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, + Requires<[IsARM, IsNotDarwin]>; +def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>, + Requires<[IsARM, IsDarwin]>; + +// zextload i1 -> zextload i8 +def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(zextloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; + +// extload -> zextload +def : ARMPat<(extloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi1 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; +def : ARMPat<(extloadi8 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>; +def : ARMPat<(extloadi8 ldst_so_reg:$addr), (LDRBrs ldst_so_reg:$addr)>; + +def : ARMPat<(extloadi16 addrmode3:$addr), (LDRH addrmode3:$addr)>; + +def : ARMPat<(extloadi8 addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>; +def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>; + +// smul* and smla* +def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16))), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), + (SMULBB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra GPR:$b, (i32 16))), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), + (SMULBT GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16))), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), + (SMULTB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), + (i32 16)), + (SMULWB GPR:$a, GPR:$b)>; +def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), + (SMULWB GPR:$a, GPR:$b)>; + +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16)))), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, sext_16_node:$b)), + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), + (sra GPR:$b, (i32 16)))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, (i32 16)), + (sra (shl GPR:$b, (i32 16)), (i32 16)))), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), + (i32 16))), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; +def : ARMV5TEPat<(add GPR:$acc, + (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), + (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; + + +// Pre-v7 uses MCR for synchronization barriers. +def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, + Requires<[IsARM, HasV6]>; + + +//===----------------------------------------------------------------------===// +// Thumb Support +// + +include "ARMInstrThumb.td" + +//===----------------------------------------------------------------------===// +// Thumb2 Support +// + +include "ARMInstrThumb2.td" + +//===----------------------------------------------------------------------===// +// Floating Point Support +// + +include "ARMInstrVFP.td" + +//===----------------------------------------------------------------------===// +// Advanced SIMD (NEON) Support +// + +include "ARMInstrNEON.td" + diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index dc3d63e26ef5..e34d69a44d9f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -80,6 +80,12 @@ def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; +def NEONvbsl : SDNode<"ARMISD::VBSL", + SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -146,10 +152,6 @@ def VLDMQIA : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; -def VLDMQDB - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), - IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. @@ -157,10 +159,6 @@ def VSTMQIA : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), IIC_fpStore_m, "", [(store (v2f64 QPR:$src), GPR:$Rn)]>; -def VSTMQDB - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), - IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1801,7 +1799,7 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VDSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1811,7 +1809,7 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8, } class N3VDSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", [(set (Ty DPR:$Vd), @@ -1841,7 +1839,7 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VQSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1852,7 +1850,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8, } class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", [(set (ResTy QPR:$Vd), @@ -1874,7 +1872,7 @@ class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, } class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1885,7 +1883,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1915,7 +1913,7 @@ class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1927,7 +1925,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1959,7 +1957,7 @@ class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -1972,7 +1970,7 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -1994,7 +1992,7 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2008,7 +2006,7 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2069,7 +2067,7 @@ class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", @@ -2081,7 +2079,7 @@ class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8, class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd", @@ -2116,7 +2114,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2129,7 +2127,7 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2164,7 +2162,7 @@ class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set QPR:$Vd, @@ -2173,7 +2171,7 @@ class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8, class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set QPR:$Vd, @@ -2219,7 +2217,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane32<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -2229,7 +2227,7 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<op24, 1, op21_20, op11_8, 1, 0, + : N3VLane16<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -2288,17 +2286,17 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode OpNode> + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 0, op4, - (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin, + (outs DPR:$Vd), (ins DPR:$Vm, ImmTy:$SIMM), f, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>; class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode OpNode> + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, 1, op4, - (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin, + (outs QPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), f, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2315,9 +2313,9 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, // Narrow shift by immediate. class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm<op24, op23, op11_8, op7, op6, op4, - (outs DPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, itin, + (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "", [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>; @@ -2325,16 +2323,18 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, // Shift right by immediate and accumulate, // both double- and quad-register. class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, + (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set DPR:$Vd, (Ty (add DPR:$src1, (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>; class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD, + (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, IIC_VPALiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (add QPR:$src1, (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>; @@ -2342,15 +2342,17 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd), - (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD, + (ins DPR:$src1, DPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>; class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, - Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd), - (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ, + (ins QPR:$src1, QPR:$Vm, ImmTy:$SIMM), f, IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd", [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>; @@ -3010,40 +3012,77 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, // with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: -multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode, Format f> { +multiclass N2VShL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShLFrm, itin, i32imm, + OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; + // imm6 = xxxxxx +} +multiclass N2VShR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, + def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, + def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin, + def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin, + def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, + def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, + def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin, + def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, N2RegVShRFrm, itin, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin, + def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, N2RegVShRFrm, itin, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx } @@ -3053,79 +3092,113 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, string Dt, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, + def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, + def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, + def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, + def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, + def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm8, OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, + def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm16, OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, + def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, shr_imm32, OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, + def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, shr_imm64, OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; // imm6 = xxxxxx } - // Neon Shift-Insert vector operations, // with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: -multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, SDNode ShOp, - Format f> { +multiclass N2VShInsL_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr> { + // 64-bit vector types. + def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "8", v8i8, NEONvsli> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "16", v4i16, NEONvsli> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "32", v2i32, NEONvsli> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "64", v1i64, NEONvsli>; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "8", v16i8, NEONvsli> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "16", v8i16, NEONvsli> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "32", v4i32, NEONvsli> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, i32imm, + N2RegVShLFrm, OpcodeStr, "64", v2i64, NEONvsli>; + // imm6 = xxxxxx +} +multiclass N2VShInsR_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr> { // 64-bit vector types. - def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "8", v8i8, ShOp> { + def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm8, + N2RegVShRFrm, OpcodeStr, "8", v8i8, NEONvsri> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "16", v4i16, ShOp> { + def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm16, + N2RegVShRFrm, OpcodeStr, "16", v4i16, NEONvsri> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "32", v2i32, ShOp> { + def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, shr_imm32, + N2RegVShRFrm, OpcodeStr, "32", v2i32, NEONvsri> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, - f, OpcodeStr, "64", v1i64, ShOp>; + def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, shr_imm64, + N2RegVShRFrm, OpcodeStr, "64", v1i64, NEONvsri>; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "8", v16i8, ShOp> { + def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm8, + N2RegVShRFrm, OpcodeStr, "8", v16i8, NEONvsri> { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "16", v8i16, ShOp> { + def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm16, + N2RegVShRFrm, OpcodeStr, "16", v8i16, NEONvsri> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, - f, OpcodeStr, "32", v4i32, ShOp> { + def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, shr_imm32, + N2RegVShRFrm, OpcodeStr, "32", v4i32, NEONvsri> { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, - f, OpcodeStr, "64", v2i64, ShOp>; + def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, shr_imm64, + N2RegVShRFrm, OpcodeStr, "64", v2i64, NEONvsri>; // imm6 = xxxxxx } @@ -3153,15 +3226,18 @@ multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, SDNode OpNode> { def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { + OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, shr_imm8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { + OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, shr_imm16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, - OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { + OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, shr_imm32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3697,16 +3773,21 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VCNTiD, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set DPR:$Vd, - (v2i32 (or (and DPR:$Vn, DPR:$src1), - (and DPR:$Vm, (vnotd DPR:$src1)))))]>; + [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; + +def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VCNTiQ, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set QPR:$Vd, - (v4i32 (or (and QPR:$Vn, QPR:$src1), - (and QPR:$Vm, (vnotq QPR:$src1)))))]>; + [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; + +def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -3917,14 +3998,13 @@ defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu>; + // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, - N2RegVShLFrm>; +defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; + // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, - N2RegVShRFrm>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, - N2RegVShRFrm>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -3957,10 +4037,8 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, - N2RegVShRFrm>; -defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, - N2RegVShRFrm>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -3974,13 +4052,11 @@ defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, - N2RegVShLFrm>; -defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, - N2RegVShLFrm>; +defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; +defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; + // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, - N2RegVShLFrm>; +defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", @@ -4018,9 +4094,10 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; +defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; + // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; +defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; // Vector Absolute and Saturating Absolute. @@ -4362,14 +4439,8 @@ def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R), - IIC_VMOVIS, "vdup", "32", "$V, $R", - [(set DPR:$V, (v2f32 (NEONvdup - (f32 (bitconvert GPR:$R)))))]>; -def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R), - IIC_VMOVIS, "vdup", "32", "$V, $R", - [(set QPR:$V, (v4f32 (NEONvdup - (f32 (bitconvert GPR:$R)))))]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; // VDUP : Vector Duplicate Lane (from scalar to all elements) @@ -4397,9 +4468,6 @@ def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> { def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> { let Inst{19} = lane{0}; } -def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> { - let Inst{19} = lane{0}; -} def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> { let Inst{19-17} = lane{2-0}; } @@ -4409,9 +4477,12 @@ def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> { def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> { let Inst{19} = lane{0}; } -def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> { - let Inst{19} = lane{0}; -} + +def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32d DPR:$Vm, imm:$lane)>; + +def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32q DPR:$Vm, imm:$lane)>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -4426,7 +4497,7 @@ def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), - (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, + (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; @@ -4517,12 +4588,12 @@ class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty> def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; -def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; +def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; -def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; +def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; // VREV32 : Vector Reverse elements within 32-bit words @@ -4628,8 +4699,8 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> { let Inst{9-8} = 0b00; } def VEXTqf : VEXTq<"vext", "32", v4f32> { - let Inst{11} = index{0}; - let Inst{10-8} = 0b000; + let Inst{11-10} = index{1-0}; + let Inst{9-8} = 0b00; } // VTRN : Vector Transpose diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 826ef46bcdb5..8c542fe60bba 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -27,22 +27,22 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{ }]>; /// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. -def imm0_7 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 8; +def imm0_7 : ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 8; }]>; def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; -def imm0_255 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 256; +def imm0_255 : ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 256; }]>; def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; -def imm8_255 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256; +def imm8_255 : ImmLeaf<i32, [{ + return Imm >= 8 && Imm < 256; }]>; def imm8_255_neg : PatLeaf<(i32 imm), [{ unsigned Val = -N->getZExtValue(); @@ -369,6 +369,15 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { let Inst{2-0} = 0b000; } + def tBX_Rm : TI<(outs), (ins pred:$p, GPR:$Rm), IIC_Br, "bx${p}\t$Rm", + [/* for disassembly only */]>, + T1Special<{1,1,0,?}> { + // A6.2.3 & A8.6.25 + bits<4> Rm; + let Inst{6-3} = Rm; + let Inst{2-0} = 0b000; + } + // Alternative return instruction used by vararg functions. def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm), IIC_Br, "bx\t$Rm", @@ -712,6 +721,19 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } +// FIXME: Remove this entry when the above ldr.n workaround is fixed. +// For disassembly use only. +def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [/* disassembly only */]>, + T1Encoding<{0,1,0,0,1,?}> { + // A6.2 & A8.6.59 + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + // A8.6.194 & A8.6.192 defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, t_addrmode_is4, AddrModeT1_4, @@ -1175,10 +1197,18 @@ def tREVSH : // A8.6.136 "revsh", "\t$Rd, $Rm", [(set tGPR:$Rd, (sext_inreg - (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), + (or (srl tGPR:$Rm, (i32 8)), (shl tGPR:$Rm, (i32 8))), i16))]>, Requires<[IsThumb, IsThumb1Only, HasV6]>; +def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), + (shl tGPR:$Rm, (i32 8))), i16), + (tREVSH tGPR:$Rm)>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + // Rotate right register def tROR : // A8.6.139 T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), @@ -1322,10 +1352,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Move between coprocessor and ARM core register -- for disassembly only // -class tMovRCopro<string opc, bit direction> - : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class tMovRCopro<string opc, bit direction, dag oops, dag iops> + : T1Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), [/* For disassembly only; pattern left blank */]> { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -1346,8 +1374,12 @@ class tMovRCopro<string opc, bit direction> let Inst{19-16} = CRn; } -def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; -def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; +def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; +def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; class tMovRRCopro<string opc, bit direction> : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), @@ -1420,7 +1452,7 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, // from some other function to get here, and we're using the stack frame for the // containing function to save/restore registers, we can't keep anything live in // regs across the eh_sjlj_setjmp(), else it will almost certainly have been -// tromped upon when we get here from a longjmp(). We force everthing out of +// tromped upon when we get here from a longjmp(). We force everything out of // registers except for our own input by listing the relevant registers in // Defs. By doing so, we also cause the prologue/epilogue code to actively // preserve all of the callee-saved resgisters, which is exactly what we want. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0e01be59c7e8..600a12180fc5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -44,7 +44,9 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{ // t2_so_imm - Match a 32-bit immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit // immediate splatted into multiple bytes of the word. -def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> { +def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{ + return ARM_AM::getT2SOImmVal(Imm) != -1; + }]> { let EncoderMethod = "getT2SOImmOpValue"; } @@ -61,49 +63,15 @@ def t2_so_imm_neg : Operand<i32>, return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; -// Break t2_so_imm's up into two pieces. This handles immediates with up to 16 -// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12] -// to get the first/second pieces. -def t2_so_imm2part : Operand<i32>, - PatLeaf<(imm), [{ - return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue()); - }]> { -} - -def t2_so_imm2part_1 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_imm2part_2 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue()); - }]> { -} - -def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{ - unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. -def imm1_31 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; +def imm1_31 : ImmLeaf<i32, [{ + return (int32_t)Imm >= 1 && (int32_t)Imm < 32; }]>; /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand<i32>, - PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 4096; + ImmLeaf<i32, [{ + return Imm >= 0 && Imm < 4096; }]>; def imm0_4095_neg : PatLeaf<(i32 imm), [{ @@ -118,6 +86,11 @@ def imm0_255_not : PatLeaf<(i32 imm), [{ return (uint32_t)(~N->getZExtValue()) < 255; }], imm_comp_XFORM>; +def lo5AllOne : PatLeaf<(i32 imm), [{ + // Returns true if all low 5-bits are 1. + return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL; +}]>; + // Define Thumb2 specific addressing modes. // t2addrmode_imm12 := reg + imm12 @@ -129,6 +102,12 @@ def t2addrmode_imm12 : Operand<i32>, let ParserMatchClass = MemMode5AsmOperand; } +// t2ldrlabel := imm12 +def t2ldrlabel : Operand<i32> { + let EncoderMethod = "getAddrModeImm12OpValue"; +} + + // ADR instruction labels. def t2adrlabel : Operand<i32> { let EncoderMethod = "getT2AdrLabelOpValue"; @@ -173,6 +152,15 @@ def t2addrmode_so_reg : Operand<i32>, let ParserMatchClass = MemMode5AsmOperand; } +// t2addrmode_reg := reg +// Used by load/store exclusive instructions. Useful to enable right assembly +// parsing and printing. Not used for any codegen matching. +// +def t2addrmode_reg : Operand<i32> { + let PrintMethod = "printAddrMode7Operand"; + let MIOperandInfo = (ops tGPR); + let ParserMatchClass = MemMode7AsmOperand; +} //===----------------------------------------------------------------------===// // Multiclass helpers... @@ -700,49 +688,27 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{24-21} = opcod; } } +} // Carry setting variants -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, - bit Commutable = 0> { +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> { // shifted imm - def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>; // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, - opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, - Requires<[IsThumb2]> { + def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + Size4Bytes, IIC_iALUr, + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } -} + def rs : t2PseudoInst< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + Size4Bytes, IIC_iALUsi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>; } } @@ -864,6 +830,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, let Inst{15-12} = Rt; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm @@ -911,7 +878,7 @@ multiclass T2I_ld<bit signed, bits<2> opcod, string opc, } // FIXME: Is the pci variant actually needed? - def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii, + def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; @@ -944,6 +911,7 @@ multiclass T2I_st<bits<2> opcod, string opc, let Inst{15-12} = Rt; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm @@ -1398,7 +1366,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn), // for disassembly only. // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1440,42 +1408,48 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "str", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_iu, - "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "str", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "strh", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "strh", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "strb", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "strb", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; @@ -1483,7 +1457,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), // only. // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 class T2IstT<bits<2> type, string opc, InstrItinClass ii> - : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1508,20 +1482,20 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants // For disassembly only. -def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2), +def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>; -def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2), +def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>; def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future @@ -1541,6 +1515,7 @@ multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> { let Inst{15-12} = 0b1111; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm12 @@ -1813,10 +1788,8 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; -defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc", - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; +defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; +defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", @@ -1847,9 +1820,14 @@ def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm), +def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm), + (t2SBCri rGPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm), + (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm), (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), +def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm), (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -2052,6 +2030,10 @@ defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; +// (rotr x, (and y, 0x...1f)) ==> (ROR x, y) +def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), + (t2RORrr rGPR:$lhs, rGPR:$rhs)>; + let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "rrx", "\t$Rd, $Rm", @@ -2140,10 +2122,12 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$Rd, $imm", [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<10> imm; let msb{4-0} = imm{9-5}; @@ -2176,9 +2160,11 @@ let Constraints = "$src = $Rd" in { [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<10> imm; let msb{4-0} = imm{9-5}; @@ -2193,9 +2179,11 @@ let Constraints = "$src = $Rd" in { IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width", []> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<5> lsbit; bits<5> width; @@ -2607,9 +2595,15 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (sext_inreg - (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), + (or (srl rGPR:$Rm, (i32 8)), (shl rGPR:$Rm, (i32 8))), i16))]>; +def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), + (shl rGPR:$Rm, (i32 8))), i16), + (t2REVSH rGPR:$Rm)>; + +def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>; + def t2PKHBT : T2ThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", @@ -2843,9 +2837,9 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{5-4} = opcod; let Inst{3-0} = 0b1111; - bits<4> Rn; + bits<4> addr; bits<4> Rt; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -2859,37 +2853,37 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{5-4} = opcod; bits<4> Rd; - bits<4> Rn; + bits<4> addr; bits<4> Rt; - let Inst{11-8} = Rd; - let Inst{19-16} = Rn; + let Inst{3-0} = Rd; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]", +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]", +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr", "", []>; -def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, +def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "ldrex", "\t$Rt, [$Rn]", "", + "ldrex", "\t$Rt, $addr", "", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000101; let Inst{11-8} = 0b1111; let Inst{7-0} = 0b00000000; // imm8 = 0 - bits<4> Rn; bits<4> Rt; - let Inst{19-16} = Rn; + bits<4> addr; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } -def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn), +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "ldrexd", "\t$Rt, $Rt2, [$Rn]", "", + "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -2897,31 +2891,31 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn), } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strexb", "\t$Rd, $Rt, [$Rn]", "", []>; -def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strexh", "\t$Rd, $Rt, [$Rn]", "", []>; -def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strex", "\t$Rd, $Rt, [$Rn]", "", - []> { +def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexb", "\t$Rd, $Rt, $addr", "", []>; +def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexh", "\t$Rd, $Rt, $addr", "", []>; +def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strex", "\t$Rd, $Rt, $addr", "", + []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000100; let Inst{7-0} = 0b00000000; // imm8 = 0 bits<4> Rd; - bits<4> Rn; + bits<4> addr; bits<4> Rt; let Inst{11-8} = Rd; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), - (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [], + "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -2965,7 +2959,7 @@ let isCall = 1, // here, and we're using the stack frame for the containing function to // save/restore registers, we can't keep anything live in regs across // the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everthing out of registers +// when we get here from a longjmp(). We force everything out of registers // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. @@ -3238,19 +3232,20 @@ class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, bits<4> Rn; let Inst{19-16} = Rn; + let Inst{15-0} = 0xc000; } def t2RFEDBW : T2RFE<0b111010000011, - (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!", + (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!", [/* For disassembly only; pattern left blank */]>; def t2RFEDB : T2RFE<0b111010000001, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn", + (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn", [/* For disassembly only; pattern left blank */]>; def t2RFEIAW : T2RFE<0b111010011011, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!", + (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!", [/* For disassembly only; pattern left blank */]>; def t2RFEIA : T2RFE<0b111010011001, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn", + (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn", [/* For disassembly only; pattern left blank */]>; //===----------------------------------------------------------------------===// @@ -3352,10 +3347,8 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */, // Move between coprocessor and ARM core register -- for disassembly only // -class t2MovRCopro<string opc, bit direction> - : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class t2MovRCopro<string opc, bit direction, dag oops, dag iops> + : T2Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), [/* For disassembly only; pattern left blank */]> { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -3376,8 +3369,12 @@ class t2MovRCopro<string opc, bit direction> let Inst{19-16} = CRn; } -def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>; -def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>; +def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; +def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; class t2MovRRCopro<string opc, bit direction> : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 29902833f2bb..376bd9607e4b 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -101,14 +101,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, let Inst{21} = 1; // Writeback let Inst{20} = L_bit; } - def DDB : - AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), - IndexModeNone, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - } def DDB_UPD : AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), IndexModeUpd, itin_upd, @@ -143,18 +135,6 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, // VFP pipelines. let D = VFPNeonDomain; } - def SDB : - AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), - IndexModeNone, itin, - !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - - // Some single precision VFP instructions may be executed on both NEON and - // VFP pipelines. - let D = VFPNeonDomain; - } def SDB_UPD : AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), IndexModeUpd, itin_upd, @@ -467,6 +447,10 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let Inst{6-5} = 0b00; let Inst{3-0} = 0b0000; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VMOVSR : AVConv4I<0b11100000, 0b1010, @@ -484,6 +468,10 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, let Inst{6-5} = 0b00; let Inst{3-0} = 0b0000; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } let neverHasSideEffects = 1 in { @@ -503,6 +491,10 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, let Inst{19-16} = Rt2; let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, @@ -510,6 +502,10 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } } // neverHasSideEffects @@ -532,6 +528,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let Inst{19-16} = Rt2; let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } let neverHasSideEffects = 1 in @@ -540,6 +540,10 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } // FMRDH: SPR -> GPR @@ -972,33 +976,15 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), // let neverHasSideEffects = 1 in { -def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", +def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), + Size4Bytes, IIC_fpUNA64, [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, RegConstraint<"$Dn = $Dd">; -def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", +def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), + Size4Bytes, IIC_fpUNA32, [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd">; - -def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", - [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, - RegConstraint<"$Dn = $Dd">; - -def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", - [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and - // VFP pipelines on A8. - let D = VFPNeonA8Domain; -} } // neverHasSideEffects //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index d9dc5cdedb30..df89fadb311b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -79,7 +79,7 @@ namespace { unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; @@ -174,7 +174,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA; - case ARM_AM::db: return ARM::VLDMSDB; + case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. } break; case ARM::VSTRS: @@ -182,7 +182,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA; - case ARM_AM::db: return ARM::VSTMSDB; + case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. } break; case ARM::VLDRD: @@ -190,7 +190,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA; - case ARM_AM::db: return ARM::VLDMDDB; + case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. } break; case ARM::VSTRD: @@ -198,7 +198,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA; - case ARM_AM::db: return ARM::VSTMDDB; + case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. } break; } @@ -246,13 +246,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::t2LDMDB_UPD: case ARM::t2STMDB: case ARM::t2STMDB_UPD: - case ARM::VLDMSDB: case ARM::VLDMSDB_UPD: - case ARM::VSTMSDB: case ARM::VSTMSDB_UPD: - case ARM::VLDMDDB: case ARM::VLDMDDB_UPD: - case ARM::VSTMDDB: case ARM::VSTMDDB_UPD: return ARM_AM::db; @@ -312,6 +308,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; else if (Offset != 0) { + // Check if this is a supported opcode before we insert instructions to + // calculate a new base register. + if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; + // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. @@ -354,6 +354,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); + if (!Opcode) return false; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); @@ -453,6 +454,25 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PRegNum = PMO.isUndef() ? UINT_MAX : getARMRegisterNumbering(PReg); unsigned Count = 1; + unsigned Limit = ~0U; + + // vldm / vstm limit are 32 for S variants, 16 for D variants. + + switch (Opcode) { + default: break; + case ARM::VSTRS: + Limit = 32; + break; + case ARM::VSTRD: + Limit = 16; + break; + case ARM::VLDRD: + Limit = 16; + break; + case ARM::VLDRS: + Limit = 32; + break; + } for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { int NewOffset = MemOps[i].Offset; @@ -460,13 +480,13 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX : getARMRegisterNumbering(Reg); - // Register numbers must be in ascending order. For VFP, the registers - // must also be consecutive and there is a limit of 16 double-word - // registers per instruction. + // Register numbers must be in ascending order. For VFP / NEON load and + // store multiples, the registers must also be consecutive and within the + // limit on the number of registers per instruction. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && - ((isNotVFP && RegNum > PRegNum) - || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { + ((isNotVFP && RegNum > PRegNum) || + ((Count < Limit) && RegNum == PRegNum+1))) { Offset += Size; PRegNum = RegNum; ++Count; @@ -567,14 +587,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::t2STMIA: case ARM::t2STMDB: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VSTMSIA: - case ARM::VSTMSDB: return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VSTMDIA: - case ARM::VSTMDDB: return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; } } @@ -624,7 +640,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VLDMSIA: - case ARM::VLDMSDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA_UPD; @@ -632,7 +647,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VLDMDIA: - case ARM::VLDMDDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA_UPD; @@ -640,7 +654,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VSTMSIA: - case ARM::VSTMSDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA_UPD; @@ -648,7 +661,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VSTMDIA: - case ARM::VSTMDDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA_UPD; @@ -749,7 +761,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. - (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); MBB.erase(MBBI); return true; @@ -1275,14 +1287,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, CurrPred, CurrPredReg, Scratch, MemOps, Merges); - // Try folding preceeding/trailing base inc/dec into the generated + // Try folding preceding/trailing base inc/dec into the generated // LDM/STM ops. for (unsigned i = 0, e = Merges.size(); i < e; ++i) if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) ++NumMerges; NumMerges += Merges.size(); - // Try folding preceeding/trailing base inc/dec into those load/store + // Try folding preceding/trailing base inc/dec into those load/store // that were not merged to form LDM/STM ops. for (unsigned i = 0; i != NumMemOps; ++i) if (!MemOps[i].Merged) @@ -1292,7 +1304,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // RS may be pointing to an instruction that's deleted. RS->skipTo(prior(MBBI)); } else if (NumMemOps == 1) { - // Try folding preceeding/trailing base inc/dec into the single + // Try folding preceding/trailing base inc/dec into the single // load/store. if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { ++NumMerges; @@ -1322,7 +1334,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops -/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it +/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it /// directly restore the value of LR into pc. /// ldmfd sp!, {..., lr} /// bx lr @@ -1530,15 +1542,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // Then make sure the immediate offset fits. int OffImm = getMemoryOpOffset(Op0); if (isT2) { - if (OffImm < 0) { - if (OffImm < -255) - // Can't fall back to t2LDRi8 / t2STRi8. - return false; - } else { - int Limit = (1 << 8) * Scale; - if (OffImm >= Limit || (OffImm & (Scale-1))) - return false; - } + int Limit = (1 << 8) * Scale; + if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) + return false; Offset = OffImm; } else { ARM_AM::AddrOpc AddSub = ARM_AM::add; diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp index 53edfcad9308..a3f89e92f8ec 100644 --- a/lib/Target/ARM/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/ARMMCAsmInfo.cpp @@ -12,8 +12,16 @@ //===----------------------------------------------------------------------===// #include "ARMMCAsmInfo.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +cl::opt<bool> +EnableARMEHABI("arm-enable-ehabi", cl::Hidden, + cl::desc("Generate ARM EHABI tables"), + cl::init(false)); + + static const char *const arm_asm_table[] = { "{r0}", "r0", "{r1}", "r1", @@ -65,4 +73,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { DwarfRequiresFrameSection = false; SupportsDebugInformation = true; + + // Exceptions handling + if (EnableARMEHABI) + ExceptionsType = ExceptionHandling::ARM; } diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index 6d7b48587d19..10607b17c532 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -278,6 +278,15 @@ public: unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const; + unsigned NEONThumb2DataIPostEncoder(const MCInst &MI, unsigned EncodedValue) const; unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI, @@ -1201,6 +1210,30 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, return MO.getReg(); } +unsigned ARMMCCodeEmitter:: +getShiftRight8Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 8 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight16Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 16 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight32Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 32 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight64Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups) const { + return 64 - MI.getOperand(Op).getImm(); +} + void ARMMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const { diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/ARMMCExpr.h index d42f766ca91f..0a2e883deb1d 100644 --- a/lib/Target/ARM/ARMMCExpr.h +++ b/lib/Target/ARM/ARMMCExpr.h @@ -60,6 +60,9 @@ public: bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const; void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index ad51bc13edf0..1cba1ba591ef 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -12,26 +12,8 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" -#include "ARMInstrInfo.h" -#include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "ARMSubtarget.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 22d15b572ddd..54bf82a99e73 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -70,6 +70,8 @@ def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>; def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>; def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>; def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>; +// These require 32-bit instructions. +let CostPerUse = 1 in { def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>; def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>; def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>; @@ -78,6 +80,7 @@ def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>; def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>; def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>; def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>; +} // Float registers def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">; @@ -99,33 +102,41 @@ def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; // Aliases of the F* registers used to hold 64-bit fp values (doubles) let SubRegIndices = [ssub_0, ssub_1] in { -def D0 : ARMReg< 0, "d0", [S0, S1]>; -def D1 : ARMReg< 1, "d1", [S2, S3]>; -def D2 : ARMReg< 2, "d2", [S4, S5]>; -def D3 : ARMReg< 3, "d3", [S6, S7]>; -def D4 : ARMReg< 4, "d4", [S8, S9]>; -def D5 : ARMReg< 5, "d5", [S10, S11]>; -def D6 : ARMReg< 6, "d6", [S12, S13]>; -def D7 : ARMReg< 7, "d7", [S14, S15]>; -def D8 : ARMReg< 8, "d8", [S16, S17]>; -def D9 : ARMReg< 9, "d9", [S18, S19]>; -def D10 : ARMReg<10, "d10", [S20, S21]>; -def D11 : ARMReg<11, "d11", [S22, S23]>; -def D12 : ARMReg<12, "d12", [S24, S25]>; -def D13 : ARMReg<13, "d13", [S26, S27]>; -def D14 : ARMReg<14, "d14", [S28, S29]>; -def D15 : ARMReg<15, "d15", [S30, S31]>; +def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>; +def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>; +def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>; +def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>; +def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>; +def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>; +def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>; +def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>; +def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>; +def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>; +def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>; +def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>; +def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>; +def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>; +def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>; +def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>; } // VFP3 defines 16 additional double registers -def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">; -def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">; -def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">; -def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">; -def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">; -def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">; -def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; -def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; +def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; +def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>; +def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>; +def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>; +def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>; +def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>; +def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; +def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>; +def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>; +def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>; +def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>; +def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>; +def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>; +def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>; +def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>; +def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>; // Advanced SIMD (NEON) defines 16 quad-word aliases let SubRegIndices = [dsub_0, dsub_1], diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 82c6735f1b14..49fedf63f8bc 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -656,19 +656,19 @@ def CortexA9Itineraries : ProcessorItineraries< [1, 1, 1]>, // // Single-precision to Integer Move + // + // On A9 move-from-VFP is free to issue with no stall if other VFP + // operations are in flight. I assume it still can't dual-issue though. InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, - InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_DRegsVFP], 0, Required>, - InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_NPipe]>], + InstrStage<1, [A9_MUX0], 0>], [2, 1]>, // // Double-precision to Integer Move + // + // On A9 move-from-VFP is free to issue with no stall if other VFP + // operations are in flight. I assume it still can't dual-issue though. InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, - InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_DRegsVFP], 0, Required>, - InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_NPipe]>], + InstrStage<1, [A9_MUX0], 0>], [2, 1, 1]>, // // Single-precision FP Load @@ -691,20 +691,22 @@ def CortexA9Itineraries : ProcessorItineraries< [2, 1]>, // // FP Load Multiple + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Load Multiple + update + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // // Single-precision FP Store InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -725,205 +727,206 @@ def CortexA9Itineraries : ProcessorItineraries< [1, 1]>, // // FP Store Multiple + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Store Multiple + update + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // NEON // VLD1 - // FIXME: Conservatively assume insufficent alignment. InstrItinData<IIC_VLD1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1]>, // VLD1x2 InstrItinData<IIC_VLD1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1, 1]>, // VLD1x3 InstrItinData<IIC_VLD1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 1]>, // VLD1x4 InstrItinData<IIC_VLD1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 3, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 1]>, // VLD1u InstrItinData<IIC_VLD1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 2, 1]>, // VLD1x2u InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1, 2, 1]>, // VLD1x3u InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 1]>, // VLD1x4u InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 2, 1]>, // // VLD1ln InstrItinData<IIC_VLD1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 1, 1, 1]>, // // VLD1lnu InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 2, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 2, 1, 1, 1, 1]>, // // VLD1dup InstrItinData<IIC_VLD1dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 1]>, // // VLD1dupu InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1, 1]>, // // VLD2 InstrItinData<IIC_VLD2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1]>, // // VLD2x2 InstrItinData<IIC_VLD2x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 4, 3, 4, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 3, 2, 3, 1]>, // // VLD2ln InstrItinData<IIC_VLD2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 4, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 1, 1, 1, 1]>, // // VLD2u InstrItinData<IIC_VLD2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 2, 1, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 2, 1, 1, 1]>, // // VLD2x2u InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 4, 3, 4, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 3, 2, 3, 2, 1]>, // // VLD2lnu InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 4, 2, 1, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 2, 1, 1, 1, 1, 1]>, // // VLD2dup InstrItinData<IIC_VLD2dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1]>, // // VLD2dupu InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 2, 1, 1]>, // // VLD3 InstrItinData<IIC_VLD3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 1]>, // // VLD3ln InstrItinData<IIC_VLD3ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -938,10 +941,10 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 2, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 2, 1]>, // // VLD3lnu InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, @@ -974,108 +977,108 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 5, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 1]>, // // VLD4ln InstrItinData<IIC_VLD4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<11,[A9_DRegsVFP], 0, Reserved>, - InstrStage<5, [A9_NPipe], 0>, - InstrStage<5, [A9_LSUnit]>], - [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, // // VLD4u InstrItinData<IIC_VLD4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 5, 2, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 2, 1]>, // // VLD4lnu InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<11,[A9_DRegsVFP], 0, Reserved>, - InstrStage<5, [A9_NPipe], 0>, - InstrStage<5, [A9_LSUnit]>], - [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, // // VLD4dup InstrItinData<IIC_VLD4dup, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 3, 4, 4, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 3, 3, 1]>, // // VLD4dupu InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 3, 4, 4, 2, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 3, 3, 2, 1, 1]>, // // VST1 InstrItinData<IIC_VST1, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // VST1x2 InstrItinData<IIC_VST1x2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST1x3 InstrItinData<IIC_VST1x3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2]>, // // VST1x4 InstrItinData<IIC_VST1x4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST1u InstrItinData<IIC_VST1u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1]>, // // VST1x2u InstrItinData<IIC_VST1x2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST1x3u @@ -1083,44 +1086,44 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2]>, // // VST1x4u InstrItinData<IIC_VST1x4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // // VST1ln InstrItinData<IIC_VST1ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // VST1lnu InstrItinData<IIC_VST1lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1]>, // // VST2 InstrItinData<IIC_VST2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST2x2 @@ -1136,9 +1139,9 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST2u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST2x2u @@ -1154,36 +1157,36 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST2ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST2lnu InstrItinData<IIC_VST2lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST3 InstrItinData<IIC_VST3, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2]>, // // VST3u InstrItinData<IIC_VST3u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2]>, // // VST3ln @@ -1208,36 +1211,36 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST4, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST4u InstrItinData<IIC_VST4u, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // // VST4ln InstrItinData<IIC_VST4ln, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST4lnu InstrItinData<IIC_VST4lnu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 2b9202bff01c..aa1e398c0e42 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -35,7 +35,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); - // This requires the copy size to be a constant, preferrably + // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (!ConstantSize) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 1465984899c6..c6f266b07531 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -38,6 +38,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) + , HasVMLxForwarding(false) , SlowFPBrcc(false) , IsThumb(isT) , ThumbMode(Thumb1) @@ -51,6 +52,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) + , AvoidCPSRPartialUpdate(false) , HasMPExtension(false) , FPOnlySP(false) , AllowsUnalignedMem(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 76c1c3fb41b1..0271c873f191 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -61,6 +61,10 @@ protected: /// whether the FP VML[AS] instructions are slow (if so, don't use them). bool SlowFPVMLx; + /// HasVMLxForwarding - If true, NEON has special multiplier accumulator + /// forwarding to allow mul + mla being issued back to back. + bool HasVMLxForwarding; + /// SlowFPBrcc - True if floating point compare + branch is slow. bool SlowFPBrcc; @@ -106,6 +110,11 @@ protected: /// over 16-bit ones. bool Pref32BitThumb; + /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions + /// that partially update CPSR and add false dependency on the previous + /// CPSR setting instruction. + bool AvoidCPSRPartialUpdate; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension; @@ -182,15 +191,19 @@ protected: bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } bool useFPVMLx() const { return !SlowFPVMLx; } + bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } + bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool hasMPExtension() const { return HasMPExtension; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } - bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + const Triple &getTargetTriple() const { return TargetTriple; } + + bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 0ee773b165fb..29aa4f7ad2ce 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -22,16 +22,13 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden); - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: + + if (TheTriple.isOSDarwin()) return new ARMMCAsmInfoDarwin(); - default: - return new ARMELFMCAsmInfo(); - } + + return new ARMELFMCAsmInfo(); } // This is duplicated code. Refactor this. @@ -41,17 +38,17 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: + + if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); return NULL; - default: - return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); } + + return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeARMTarget() { @@ -86,8 +83,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, isThumb), JITInfo(), - InstrItins(Subtarget.getInstrItineraryData()) -{ + InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); } @@ -149,8 +145,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (ExpandMLx && - OptLevel != CodeGenOpt::None && Subtarget.hasVFP2()) + if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) PM.add(createMLxExpansionPass()); return true; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 7535da54a95f..19defa1b5196 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -36,8 +36,9 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); + LSDASection = NULL; } - + AttributesSection = getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 129af206e1d9..29ecc182d31f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -29,15 +29,6 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -/// Shift types used for register controlled shifts in ARM memory addressing. -enum ShiftType { - Lsl, - Lsr, - Asr, - Ror, - Rrx -}; - namespace { class ARMOperand; @@ -55,8 +46,10 @@ class ARMAsmParser : public TargetAsmParser { int TryParseRegister(); virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); + bool TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); - bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); + bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &, + ARMII::AddrMode AddrMode); bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); bool ParsePrefix(ARMMCExpr::VariantKind &RefKind); const MCExpr *ApplyPrefixToExpr(const MCExpr *E, @@ -65,13 +58,14 @@ class ARMAsmParser : public TargetAsmParser { bool ParseMemoryOffsetReg(bool &Negative, bool &OffsetRegShifted, - enum ShiftType &ShiftType, + enum ARM_AM::ShiftOpc &ShiftType, const MCExpr *&ShiftAmount, const MCExpr *&Offset, bool &OffsetIsReg, int &OffsetRegNum, SMLoc &E); - bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); + bool ParseShift(enum ARM_AM::ShiftOpc &St, + const MCExpr *&ShiftAmount, SMLoc &E); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveThumb(SMLoc L); bool ParseDirectiveThumbFunc(SMLoc L); @@ -102,10 +96,25 @@ class ARMAsmParser : public TargetAsmParser { SmallVectorImpl<MCParsedAsmOperand*>&); OperandMatchResultTy tryParseMSRMaskOperand( SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseMemMode2Operand( + SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy tryParseMemMode3Operand( + SmallVectorImpl<MCParsedAsmOperand*>&); + + // Asm Match Converter Methods + bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); + bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &); public: ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) : TargetAsmParser(T), Parser(_Parser), TM(_TM) { + MCAsmParserExtension::Initialize(_Parser); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures( &TM.getSubtarget<ARMSubtarget>())); @@ -136,6 +145,7 @@ class ARMOperand : public MCParsedAsmOperand { RegisterList, DPRRegisterList, SPRRegisterList, + Shifter, Token } Kind; @@ -178,13 +188,14 @@ class ARMOperand : public MCParsedAsmOperand { /// Combined record for all forms of ARM address expressions. struct { + ARMII::AddrMode AddrMode; unsigned BaseRegNum; union { unsigned RegNum; ///< Offset register num, when OffsetIsReg. const MCExpr *Value; ///< Offset value, when !OffsetIsReg. } Offset; const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - enum ShiftType ShiftType; // used when OffsetRegShifted is true + enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true unsigned Preindexed : 1; unsigned Postindexed : 1; @@ -192,6 +203,11 @@ class ARMOperand : public MCParsedAsmOperand { unsigned Negative : 1; // only used when OffsetIsReg is true unsigned Writeback : 1; } Mem; + + struct { + ARM_AM::ShiftOpc ShiftTy; + unsigned RegNum; + } Shift; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -234,6 +250,10 @@ public: break; case ProcIFlags: IFlags = o.IFlags; + break; + case Shifter: + Shift = o.Shift; + break; } } @@ -290,7 +310,9 @@ public: /// @name Memory Operand Accessors /// @{ - + ARMII::AddrMode getMemAddrMode() const { + return Mem.AddrMode; + } unsigned getMemBaseRegNum() const { return Mem.BaseRegNum; } @@ -310,7 +332,7 @@ public: assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); return Mem.ShiftAmount; } - enum ShiftType getMemShiftType() const { + enum ARM_AM::ShiftOpc getMemShiftType() const { assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); return Mem.ShiftType; } @@ -334,6 +356,52 @@ public: bool isToken() const { return Kind == Token; } bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; } bool isMemory() const { return Kind == Memory; } + bool isShifter() const { return Kind == Shifter; } + bool isMemMode2() const { + if (getMemAddrMode() != ARMII::AddrMode2) + return false; + + if (getMemOffsetIsReg()) + return true; + + if (getMemNegative() && + !(getMemPostindexed() || getMemPreindexed())) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; + int64_t Value = CE->getValue(); + + // The offset must be in the range 0-4095 (imm12). + if (Value > 4095 || Value < -4095) + return false; + + return true; + } + bool isMemMode3() const { + if (getMemAddrMode() != ARMII::AddrMode3) + return false; + + if (getMemOffsetIsReg()) { + if (getMemOffsetRegShifted()) + return false; // No shift with offset reg allowed + return true; + } + + if (getMemNegative() && + !(getMemPostindexed() || getMemPreindexed())) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; + int64_t Value = CE->getValue(); + + // The offset must be in the range 0-255 (imm8). + if (Value > 255 || Value < -255) + return false; + + return true; + } bool isMemMode5() const { if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() || getMemNegative()) @@ -346,6 +414,23 @@ public: int64_t Value = CE->getValue(); return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); } + bool isMemMode7() const { + if (!isMemory() || + getMemPreindexed() || + getMemPostindexed() || + getMemOffsetIsReg() || + getMemNegative() || + getMemWriteback()) + return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + if (!CE) return false; + + if (CE->getValue()) + return false; + + return true; + } bool isMemModeRegThumb() const { if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback()) return false; @@ -402,6 +487,12 @@ public: Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addShifterOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm( + ARM_AM::getSORegOpc(Shift.ShiftTy, 0))); + } + void addRegListOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const SmallVectorImpl<unsigned> &RegList = getRegList(); @@ -428,6 +519,88 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); } + void addMemMode7Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && isMemMode7() && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + (void)CE; + assert((CE || CE->getValue() == 0) && + "No offset operand support in mode 7"); + } + + void addMemMode2Operands(MCInst &Inst, unsigned N) const { + assert(isMemMode2() && "Invalid mode or number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + + if (getMemOffsetIsReg()) { + Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + + ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; + ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift; + int64_t ShiftAmount = 0; + + if (getMemOffsetRegShifted()) { + ShOpc = getMemShiftType(); + const MCConstantExpr *CE = + dyn_cast<MCConstantExpr>(getMemShiftAmount()); + ShiftAmount = CE->getValue(); + } + + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount, + ShOpc, IdxMode))); + return; + } + + // Create a operand placeholder to always yield the same number of operands. + Inst.addOperand(MCOperand::CreateReg(0)); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + assert(CE && "Non-constant mode 2 offset operand!"); + int64_t Offset = CE->getValue(); + + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add, + Offset, ARM_AM::no_shift, IdxMode))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub, + -Offset, ARM_AM::no_shift, IdxMode))); + } + + void addMemMode3Operands(MCInst &Inst, unsigned N) const { + assert(isMemMode3() && "Invalid mode or number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + + if (getMemOffsetIsReg()) { + Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + + ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0, + IdxMode))); + return; + } + + // Create a operand placeholder to always yield the same number of operands. + Inst.addOperand(MCOperand::CreateReg(0)); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset()); + assert(CE && "Non-constant mode 3 offset operand!"); + int64_t Offset = CE->getValue(); + + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add, + Offset, IdxMode))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub, + -Offset, IdxMode))); + } + void addMemMode5Operands(MCInst &Inst, unsigned N) const { assert(N == 2 && isMemMode5() && "Invalid number of operands!"); @@ -525,6 +698,15 @@ public: return Op; } + static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(Shifter); + Op->Shift.ShiftTy = ShTy; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand * CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs, SMLoc StartLoc, SMLoc EndLoc) { @@ -553,9 +735,10 @@ public: return Op; } - static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, - const MCExpr *Offset, int OffsetRegNum, - bool OffsetRegShifted, enum ShiftType ShiftType, + static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum, + bool OffsetIsReg, const MCExpr *Offset, + int OffsetRegNum, bool OffsetRegShifted, + enum ARM_AM::ShiftOpc ShiftType, const MCExpr *ShiftAmount, bool Preindexed, bool Postindexed, bool Negative, bool Writeback, SMLoc S, SMLoc E) { @@ -571,6 +754,7 @@ public: "Cannot have expression offset and register offset!"); ARMOperand *Op = new ARMOperand(Memory); + Op->Mem.AddrMode = AddrMode; Op->Mem.BaseRegNum = BaseRegNum; Op->Mem.OffsetIsReg = OffsetIsReg; if (OffsetIsReg) @@ -642,7 +826,8 @@ void ARMOperand::dump(raw_ostream &OS) const { break; case Memory: OS << "<memory " - << "base:" << getMemBaseRegNum(); + << "am:" << ARMII::AddrModeToString(getMemAddrMode()) + << " base:" << getMemBaseRegNum(); if (getMemOffsetIsReg()) { OS << " offset:<register " << getMemOffsetRegNum(); if (getMemOffsetRegShifted()) { @@ -676,6 +861,9 @@ void ARMOperand::dump(raw_ostream &OS) const { case Register: OS << "<register " << getReg() << ">"; break; + case Shifter: + OS << "<shifter " << getShiftOpcStr(Shift.ShiftTy) << ">"; + break; case RegisterList: case DPRRegisterList: case SPRRegisterList: { @@ -738,6 +926,42 @@ int ARMAsmParser::TryParseRegister() { return RegNum; } +/// Try to parse a register name. The token must be an Identifier when called, +/// and if it is a register name the token is eaten and the register number is +/// returned. Otherwise return -1. +/// +bool ARMAsmParser::TryParseShiftRegister( + SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + std::string upperCase = Tok.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + ARM_AM::ShiftOpc ShiftTy = StringSwitch<ARM_AM::ShiftOpc>(lowerCase) + .Case("lsl", ARM_AM::lsl) + .Case("lsr", ARM_AM::lsr) + .Case("asr", ARM_AM::asr) + .Case("ror", ARM_AM::ror) + .Case("rrx", ARM_AM::rrx) + .Default(ARM_AM::no_shift); + + if (ShiftTy == ARM_AM::no_shift) + return true; + + Parser.Lex(); // Eat shift-type operand; + int RegNum = TryParseRegister(); + if (RegNum == -1) + return Error(Parser.getTok().getLoc(), "register expected"); + + Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateShifter(ShiftTy, + S, Parser.getTok().getLoc())); + + return false; +} + + /// Try to parse a register name. The token must be an Identifier when called. /// If it's a register, an AsmOperand is created. Another AsmOperand is created /// if there is a "writeback". 'true' if it's not a register. @@ -1046,13 +1270,96 @@ tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } +/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMemMode2Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); + + if (ParseMemory(Operands, ARMII::AddrMode2)) + return MatchOperand_NoMatch; + + return MatchOperand_Success; +} + +/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMemMode3Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); + + if (ParseMemory(Operands, ARMII::AddrMode3)) + return MatchOperand_NoMatch; + + return MatchOperand_Success; +} + +/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. /// /// TODO Only preindexing and postindexing addressing are started, unindexed /// with option, etc are still to do. bool ARMAsmParser:: -ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands, + ARMII::AddrMode AddrMode = ARMII::AddrModeNone) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -1083,7 +1390,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { ARMOperand *WBOp = 0; int OffsetRegNum = -1; bool OffsetRegShifted = false; - enum ShiftType ShiftType = Lsl; + enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl; const MCExpr *ShiftAmount = 0; const MCExpr *Offset = 0; @@ -1106,10 +1413,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { + // None of addrmode3 instruction uses "!" + if (AddrMode == ARMII::AddrMode3) + return true; + WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), ExclaimTok.getLoc()); Writeback = true; Parser.Lex(); // Eat exclaim token + } else { // In addressing mode 2, pre-indexed mode always end with "!" + if (AddrMode == ARMII::AddrMode2) + Preindexed = false; } } else { // The "[Rn" we have so far was not followed by a comma. @@ -1143,13 +1457,17 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { if (!OffsetIsReg) { if (!Offset) Offset = MCConstantExpr::Create(0, getContext()); + } else { + if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) { + Error(E, "shift amount not supported"); + return true; + } } - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, - OffsetRegNum, OffsetRegShifted, - ShiftType, ShiftAmount, Preindexed, - Postindexed, Negative, Writeback, - S, E)); + Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg, + Offset, OffsetRegNum, OffsetRegShifted, + ShiftType, ShiftAmount, Preindexed, + Postindexed, Negative, Writeback, S, E)); if (WBOp) Operands.push_back(WBOp); @@ -1165,7 +1483,7 @@ ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// we return false on success or an error otherwise. bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, bool &OffsetRegShifted, - enum ShiftType &ShiftType, + enum ARM_AM::ShiftOpc &ShiftType, const MCExpr *&ShiftAmount, const MCExpr *&Offset, bool &OffsetIsReg, @@ -1226,28 +1544,28 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx /// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, - SMLoc &E) { +bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St, + const MCExpr *&ShiftAmount, SMLoc &E) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL") - St = Lsl; + St = ARM_AM::lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") - St = Lsr; + St = ARM_AM::lsr; else if (ShiftName == "asr" || ShiftName == "ASR") - St = Asr; + St = ARM_AM::asr; else if (ShiftName == "ror" || ShiftName == "ROR") - St = Ror; + St = ARM_AM::ror; else if (ShiftName == "rrx" || ShiftName == "RRX") - St = Rrx; + St = ARM_AM::rrx; else return true; Parser.Lex(); // Eat shift type token. // Rrx stands alone. - if (St == Rrx) + if (St == ARM_AM::rrx) return false; // Otherwise, there must be a '#' and a shift amount. @@ -1286,6 +1604,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, case AsmToken::Identifier: if (!TryParseRegisterWithWriteBack(Operands)) return false; + if (!TryParseShiftRegister(Operands)) + return false; + // Fall though for the Identifier case that is not a register or a // special name. diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 78d73d3a272b..bdce2c4cf896 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,6 +18,7 @@ #include "ARMDisassembler.h" #include "ARMDisassemblerCore.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" @@ -94,6 +95,9 @@ static unsigned decodeARMInstruction(uint32_t &insn) { // As a result, the decoder fails to deocode USAT properly. if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) return ARM::USAT; + // As a result, the decoder fails to deocode UQADD16 properly. + if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1) + return ARM::UQADD16; // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. // As a result, the decoder fails to decode UMULL properly. @@ -280,6 +284,24 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { } } +// Helper function for special case handling of PLD (literal) and friends. +// See A8.6.117 T1 & T2 and friends for why we morphed the opcode +// before returning it. +static unsigned T2Morph2PLDLiteral(unsigned Opcode) { + switch (Opcode) { + default: + return Opcode; // Return unmorphed opcode. + + case ARM::t2PLDi8: case ARM::t2PLDs: + case ARM::t2PLDWi12: case ARM::t2PLDWi8: + case ARM::t2PLDWs: + return ARM::t2PLDi12; + + case ARM::t2PLIi8: case ARM::t2PLIs: + return ARM::t2PLIi12; + } +} + /// decodeThumbSideEffect is a decorator function which can potentially twiddle /// the instruction or morph the returned opcode under Thumb2. /// @@ -330,12 +352,27 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { } // --------- Transform End Marker --------- + unsigned unmorphed = decodeThumbInstruction(insn); + // See, for example, A6.3.7 Load word: Table A6-18 Load word. // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode // before returning it to our caller. if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1 - && slice(insn, 19, 16) == 15) - return T2Morph2LoadLiteral(decodeThumbInstruction(insn)); + && slice(insn, 19, 16) == 15) { + unsigned morphed = T2Morph2LoadLiteral(unmorphed); + if (morphed != unmorphed) + return morphed; + } + + // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for + // why we morphed the opcode before returning it to our caller. + if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF + && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1 + && slice(insn, 19, 16) == 15) { + unsigned morphed = T2Morph2PLDLiteral(unmorphed); + if (morphed != unmorphed) + return morphed; + } // One last check for NEON/VFP instructions. if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1) @@ -375,21 +412,23 @@ bool ARMDisassembler::getInstruction(MCInst &MI, Size = 4; DEBUG({ - errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) + errs() << "\nOpcode=" << Opcode << " Name=" <<ARMUtils::OpcodeName(Opcode) << " Format=" << stringForARMFormat(Format) << '(' << (int)Format << ")\n"; showBitVector(errs(), insn); }); - ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); + OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); if (!Builder) return false; + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; - delete Builder; - return true; } @@ -398,7 +437,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, const MemoryObject &Region, uint64_t Address, raw_ostream &os) const { - // The Thumb instruction stream is a sequence of halhwords. + // The Thumb instruction stream is a sequence of halfwords. // This represents the first halfword as well as the machine instruction // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top @@ -463,17 +502,19 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, showBitVector(errs(), insn); }); - ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); + OwningPtr<ARMBasicMCBuilder> Builder(CreateMCBuilder(Opcode, Format)); if (!Builder) return false; Builder->SetSession(const_cast<Session *>(&SO)); + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; - delete Builder; - return true; } diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index bac68dd9ead0..642829cdab09 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -17,6 +17,7 @@ #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "ARMMCExpr.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -82,10 +83,28 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) { // FIXME: Auto-gened? static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) { - // For this purpose, we can treat rGPR as if it were GPR. - if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID; + if (RegClassID == ARM::rGPRRegClassID) { + // Check for The register numbers 13 and 15 that are not permitted for many + // Thumb register specifiers. + if (RawRegister == 13 || RawRegister == 15) { + B->SetErr(-1); + return 0; + } + // For this purpose, we can treat rGPR as if it were GPR. + RegClassID = ARM::GPRRegClassID; + } // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). + // A7.3 register encoding + // Qd -> bit[12] == 0 + // Qn -> bit[16] == 0 + // Qm -> bit[0] == 0 + // + // If one of these bits is 1, the instruction is UNDEFINED. + if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) { + B->SetErr(-1); + return 0; + } unsigned RegNum = RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; @@ -497,14 +516,66 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } +// A8.6.94 MLA +// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE; +// +// A8.6.105 MUL +// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; +// +// A8.6.246 UMULL +// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE; +// if dHi == dLo then UNPREDICTABLE; +static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) { + unsigned R19_16 = slice(insn, 19, 16); + unsigned R15_12 = slice(insn, 15, 12); + unsigned R11_8 = slice(insn, 11, 8); + unsigned R3_0 = slice(insn, 3, 0); + switch (Opcode) { + default: + // Did we miss an opcode? + DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!"); + return false; + case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT: + case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT: + case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR: + case ARM::USADA8: + if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + return false; + case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR: + case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT: + case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX: + // A8.6.167 SMLAD & A8.6.172 SMLSD + case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX: + case ARM::USAD8: + if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + return false; + case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL: + case ARM::UMULL: + case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT: + case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX: + if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + if (R19_16 == R15_12) + return true; + return false;; + } +} + // Multiply Instructions. -// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS: +// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR, +// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience): // Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12} +// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is +// only for {d, n, m}. // -// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT: +// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD, +// SMUADX, and USAD8 (for convenience): // Rd{19-16} Rn{3-0} Rm{11-8} // -// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT: +// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT, +// SMLALD, SMLADLX, SMLSLD, SMLSLDX: // RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8} // // The mapping of the multiply registers to the "regular" ARM registers, where @@ -531,6 +602,10 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[2].RegClass == ARM::GPRRegClassID && "Expect three register operands"); + // Sanity check for the register encodings. + if (BadRegsMulFrm(Opcode, insn)) + return false; + // Instructions with two destination registers have RdLo{15-12} first. if (NumDefs == 2) { assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && @@ -618,18 +693,38 @@ static inline unsigned GetCopOpc(uint32_t insn) { static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr"); + assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr"); unsigned &OpIdx = NumOpsAdded; + // A8.6.92 + // if coproc == '101x' then SEE "Advanced SIMD and VFP" + // But since the special instructions have more explicit encoding bits + // specified, if coproc == 10 or 11, we should reject it as invalid. + unsigned coproc = GetCoprocessor(insn); + if ((Opcode == ARM::MCR || Opcode == ARM::MCRR || + Opcode == ARM::MRC || Opcode == ARM::MRRC) && + (coproc == 10 || coproc == 11)) { + DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n"); + return false; + } + bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 || Opcode == ARM::MRRC || Opcode == ARM::MRRC2); + // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}). bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2); bool LdStCop = LdStCopOpcode(Opcode); + bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2); OpIdx = 0; - MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn))); + if (RtOut) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + ++OpIdx; + } + MI.addOperand(MCOperand::CreateImm(coproc)); + ++OpIdx; if (LdStCop) { // Unindex if P:W = 0b00 --> _OPTION variant @@ -639,26 +734,34 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); + OpIdx += 2; if (PW) { MI.addOperand(MCOperand::CreateReg(0)); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + const TargetInstrDesc &TID = ARMInsts[Opcode]; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2, - ARM_AM::no_shift); + ARM_AM::no_shift, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); - OpIdx = 5; + OpIdx += 2; } else { MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0))); - OpIdx = 4; + ++OpIdx; } } else { MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn) : GetCopOpc1(insn, NoGPR))); + ++OpIdx; - MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) - : MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); + if (!RtOut) { + MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) + : MCOperand::CreateReg( + getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + ++OpIdx; + } MI.addOperand(OneCopOpc ? MCOperand::CreateReg( getRegisterEnum(B, ARM::GPRRegClassID, @@ -667,7 +770,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(decodeRm(insn))); - OpIdx = 5; + OpIdx += 2; if (!OneCopOpc) { MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn))); @@ -679,8 +782,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, } // Branch Instructions. -// BLr9: SignExtend(Imm24:'00', 32) -// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 +// BL: SignExtend(Imm24:'00', 32) +// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 // SMC: ZeroExtend(imm4, 32) // SVC: ZeroExtend(Imm24, 32) // @@ -735,6 +838,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // MSRi take a mask, followed by one so_imm operand. The mask contains the // R Bit in bit 4, and the special register fields in bits 3-0. if (Opcode == ARM::MSRi) { + // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate) + // The hints instructions have more specific encodings, so if mask == 0, + // we should reject this as an invalid instruction. + if (slice(insn, 19, 16) == 0) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | slice(insn, 19, 16) /* Special Reg */ )); // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. @@ -760,11 +868,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred + assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred || Opcode == ARM::SMC || Opcode == ARM::SVC) && "Unexpected Opcode"); - assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected"); + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); int Imm32 = 0; if (Opcode == ARM::SMC) { @@ -778,12 +886,6 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Imm26 = slice(insn, 23, 0) << 2; //Imm32 = signextend<signed int, 26>(Imm26); Imm32 = SignExtend32<26>(Imm26); - - // When executing an ARM instruction, PC reads as the address of the current - // instruction plus 8. The assembler subtracts 8 from the difference - // between the branch instruction and the target address, disassembler has - // to add 8 to compensate. - Imm32 += 8; } MI.addOperand(MCOperand::CreateImm(Imm32)); @@ -793,7 +895,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // Misc. Branch Instructions. -// BLXr9, BXr9 +// BLX, BLXi, BX // BX, BX_RET static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -809,8 +911,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR) return true; - // BLXr9 and BX take one GPR reg. - if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) { + // BLX and BX take one GPR reg. + if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred || + Opcode == ARM::BX) { assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -819,6 +922,17 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } + // BLXi takes imm32 (the PC offset). + if (Opcode == ARM::BLXi) { + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); + // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}. + unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1; + int Imm32 = SignExtend32<26>(Imm26); + MI.addOperand(MCOperand::CreateImm(Imm32)); + OpIdx = 1; + return true; + } + return false; } @@ -837,6 +951,24 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { return true; } +// Standard data-processing instructions allow PC as a register specifier, +// but we should reject other DPFrm instructions with PC as registers. +static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) { + switch (Opcode) { + default: + // Did we miss an opcode? + if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) { + DEBUG(errs() << "DPFrm with bad reg specifier(s)\n"); + return true; + } + case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr: + case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr: + case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr: + case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr: + return false; + } +} + // A major complication is the fact that some of the saturating add/subtract // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. @@ -864,6 +996,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Special-case handling of BFC/BFI/SBFX/UBFX. if (Opcode == ARM::BFC || Opcode == ARM::BFI) { + // A8.6.17 BFC & A8.6.18 BFI + // Sanity check Rd. + if (decodeRd(insn) == 15) + return false; MI.addOperand(MCOperand::CreateReg(0)); if (Opcode == ARM::BFI) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -879,6 +1015,9 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { + // Sanity check Rd and Rm. + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); @@ -915,15 +1054,21 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are // routed here as well. // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); + if (BadRegsDPFrm(Opcode, insn)) + return false; MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, ARM::GPRRegClassID, RmRn? decodeRn(insn) : decodeRm(insn)))); ++OpIdx; } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { + // These two instructions don't allow d as 15. + if (decodeRd(insn) == 15) + return false; // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}). assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0); - MI.addOperand(MCOperand::CreateImm(Imm16)); + if (!B->tryAddingSymbolicOperand(Imm16, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Imm16)); ++OpIdx; } else { // We have a reg/imm form. @@ -992,6 +1137,21 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Rs) { + // If Inst{7} != 0, we should reject this insn as an invalid encoding. + if (slice(insn, 7, 7)) + return false; + + // A8.6.3 ADC (register-shifted register) + // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE; + // + // This also accounts for shift instructions (register) where, fortunately, + // Inst{19-16} = 0b0000. + // A8.6.89 LSL (register) + // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || + decodeRm(insn) == 15 || decodeRs(insn) == 15) + return false; + // Register-controlled shifts: [Rm, Rs, shift]. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); @@ -1015,6 +1175,71 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack, + bool Imm) { + const StringRef Name = ARMInsts[Opcode].Name; + unsigned Rt = decodeRd(insn); + unsigned Rn = decodeRn(insn); + unsigned Rm = decodeRm(insn); + unsigned P = getPBit(insn); + unsigned W = getWBit(insn); + + if (Store) { + // Only STR (immediate, register) allows PC as the source. + if (Name.startswith("STRB") && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + if (WBack && (Rn == 15 || Rn == Rt)) { + DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); + return true; + } + if (!Imm && Rm == 15) { + DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + // Only LDR (immediate, register) allows PC as the destination. + if (Name.startswith("LDRB") && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + if (Imm) { + // Immediate + if (Rn == 15) { + // The literal form must be in offset mode; it's an encoding error + // otherwise. + if (!(P == 1 && W == 0)) { + DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n"); + return true; + } + // LDRB (literal) does not allow PC as the destination. + if (Opcode != ARM::LDRi12 && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + // Write back while Rn == Rt does not make sense. + if (WBack && (Rn == Rt)) { + DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); + return true; + } + } + } else { + // Register + if (Rm == 15) { + DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); + return true; + } + if (WBack && (Rn == 15 || Rn == Rt)) { + DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); + return true; + } + } + } + return false; +} + static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { @@ -1077,19 +1302,41 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpIdx + 1 >= NumOps) return false; - assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass < 0) && - "Expect 1 reg operand followed by 1 imm operand"); + if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0)) + return false; ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getIBit(insn) == 0) { - MI.addOperand(MCOperand::CreateReg(0)); + // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2). + // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already + // been populated. + if (isPrePost) { + MI.addOperand(MCOperand::CreateReg(0)); + OpIdx += 1; + } - // Disassemble the 12-bit immediate offset. unsigned Imm12 = slice(insn, 11, 0); - unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift); - MI.addOperand(MCOperand::CreateImm(Offset)); + if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 || + Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) { + // Disassemble the 12-bit immediate offset, which is the second operand in + // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm). + int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12; + MI.addOperand(MCOperand::CreateImm(Offset)); + } else { + // Disassemble the 12-bit immediate offset, which is the second operand in + // $am2offset => (ops GPR, i32imm). + unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift, + IndexMode); + MI.addOperand(MCOperand::CreateImm(Offset)); + } + OpIdx += 1; } else { + // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid. + if (slice(insn,4,4) == 1) + return false; + // Disassemble the offset reg (Rm), shift type, and immediate shift length. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); @@ -1101,9 +1348,9 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.4.1. Possible rrx or shift amount of 32... getImmShiftSE(ShOp, ShImm); MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp))); + ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode))); + OpIdx += 2; } - OpIdx += 2; return true; } @@ -1125,7 +1372,7 @@ static bool HasDualReg(unsigned Opcode) { case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST: return true; - } + } } static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, @@ -1153,8 +1400,6 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; } - bool DualReg = HasDualReg(Opcode); - // Disassemble the dst/src operand. if (OpIdx >= NumOps) return false; @@ -1165,8 +1410,8 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRd(insn)))); ++OpIdx; - // Fill in LDRD and STRD's second operand. - if (DualReg) { + // Fill in LDRD and STRD's second operand Rt operand. + if (HasDualReg(Opcode)) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn) + 1))); ++OpIdx; @@ -1188,7 +1433,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) - && "Index mode or tied_to operand expected"); + && "Offset mode or tied_to operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1204,19 +1449,22 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getAM3IBit(insn) == 1) { MI.addOperand(MCOperand::CreateReg(0)); // Disassemble the 8-bit immediate offset. unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF; unsigned Imm4L = insn & 0xF; - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L); + unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L, + IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm). MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0); + unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); } OpIdx += 2; @@ -1236,13 +1484,13 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // The algorithm for disassembly of LdStMulFrm is different from others because -// it explicitly populates the two predicate operands after operand 0 (the base) -// and operand 1 (the AM4 mode imm). After operand 3, we need to populate the -// reglist with each affected register encoded as an MCOperand. +// it explicitly populates the two predicate operands after the base register. +// After that, we need to populate the reglist with each affected register +// encoded as an MCOperand. static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5"); + assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4"); NumOpsAdded = 0; unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); @@ -1260,8 +1508,10 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); // Handling the two predicate operands before the reglist. - int64_t CondVal = insn >> ARMII::CondShift; - MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal)); + int64_t CondVal = getCondField(insn); + if (CondVal == 0xF) + return false; + MI.addOperand(MCOperand::CreateImm(CondVal)); MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); NumOpsAdded += 3; @@ -1352,6 +1602,12 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; + // Sanity check the registers, which should not be 15. + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + if (ThreeReg && decodeRn(insn) == 15) + return false; + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; @@ -1376,7 +1632,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; if (Opcode == ARM::PKHBT) Opc = ARM_AM::lsl; - else if (Opcode == ARM::PKHBT) + else if (Opcode == ARM::PKHTB) Opc = ARM_AM::asr; getImmShiftSE(Opc, ShiftAmt); MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); @@ -1391,6 +1647,11 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + // A8.6.183 SSAT + // if d == 15 || n == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + const TargetInstrDesc &TID = ARMInsts[Opcode]; NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands @@ -1429,6 +1690,11 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + // A8.6.220 SXTAB + // if d == 15 || m == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1611,7 +1877,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.295 vcvt (floating-point <-> integer) // Int to FP: VSITOD, VSITOS, VUITOD, VUITOS // FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S -// +// // A8.6.297 vcvt (floating-point and fixed-point) // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, @@ -1800,15 +2066,14 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // VFP Load/Store Multiple Instructions. -// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and -// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is -// followed by a reglist of either DPR(s) or SPR(s). +// We have an optional write back reg, the base, and two predicate operands. +// It is then followed by a reglist of either DPR(s) or SPR(s). // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5"); + assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4"); unsigned &OpIdx = NumOpsAdded; @@ -1827,25 +2092,18 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); - // Next comes the AM4 Opcode. - ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); - // Must be either "ia" or "db" submode. - if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { - DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n"); - return false; - } - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); - // Handling the two predicate operands before the reglist. - int64_t CondVal = insn >> ARMII::CondShift; - MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal)); + int64_t CondVal = getCondField(insn); + if (CondVal == 0xF) + return false; + MI.addOperand(MCOperand::CreateImm(CondVal)); MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 4; + OpIdx += 3; - bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSDB || + bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD || - Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSDB || + Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; @@ -1855,6 +2113,11 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Fill the variadic part of reglist. unsigned char Imm8 = insn & 0xFF; unsigned Regs = isSPVFP ? Imm8 : Imm8/2; + + // Apply some sanity checks before proceeding. + if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16)) + return false; + for (unsigned i = 0; i < Regs; ++i) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD + i))); @@ -2136,7 +2399,7 @@ static unsigned decodeN3VImm(uint32_t insn) { // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, - BO B) { + unsigned alignment, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2180,9 +2443,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); + // addrmode6 := (ops GPR:$addr, i32imm) MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); - MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? + MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment OpIdx += 2; if (WB) { @@ -2230,9 +2494,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); + // addrmode6 := (ops GPR:$addr, i32imm) MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); - MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? + MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment OpIdx += 2; if (WB) { @@ -2263,6 +2528,92 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +// A8.6.308, A8.6.311, A8.6.314, A8.6.317. +static bool Align4OneLaneInst(unsigned elem, unsigned size, + unsigned index_align, unsigned & alignment) { + unsigned bits = 0; + switch (elem) { + default: + return false; + case 1: + // A8.6.308 + if (size == 0) + return slice(index_align, 0, 0) == 0; + else if (size == 1) { + bits = slice(index_align, 1, 0); + if (bits != 0 && bits != 1) + return false; + if (bits == 1) + alignment = 16; + return true; + } else if (size == 2) { + bits = slice(index_align, 2, 0); + if (bits != 0 && bits != 3) + return false; + if (bits == 3) + alignment = 32; + return true;; + } + return true; + case 2: + // A8.6.311 + if (size == 0) { + if (slice(index_align, 0, 0) == 1) + alignment = 16; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) == 1) + alignment = 32; + return true; + } else if (size == 2) { + if (slice(index_align, 1, 1) != 0) + return false; + if (slice(index_align, 0, 0) == 1) + alignment = 64; + return true;; + } + return true; + case 3: + // A8.6.314 + if (size == 0) { + if (slice(index_align, 0, 0) != 0) + return false; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) != 0) + return false; + return true; + return true; + } else if (size == 2) { + if (slice(index_align, 1, 0) != 0) + return false; + return true;; + } + return true; + case 4: + // A8.6.317 + if (size == 0) { + if (slice(index_align, 0, 0) == 1) + alignment = 32; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) == 1) + alignment = 64; + return true; + } else if (size == 2) { + bits = slice(index_align, 1, 0); + if (bits == 3) + return false; + if (bits == 1) + alignment = 64; + else if (bits == 2) + alignment = 128; + return true;; + } + return true; + } +} + // A7.7 // If L (Inst{21}) == 0, store instructions. // Find out about double-spaced-ness of the Opcode and pass it on to @@ -2272,11 +2623,33 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, const StringRef Name = ARMInsts[Opcode].Name; bool DblSpaced = false; + // 0 represents standard alignment, i.e., unaligned data access. + unsigned alignment = 0; + + unsigned elem = 0; // legal values: {1, 2, 3, 4} + if (Name.startswith("VST1") || Name.startswith("VLD1")) + elem = 1; + + if (Name.startswith("VST2") || Name.startswith("VLD2")) + elem = 2; + + if (Name.startswith("VST3") || Name.startswith("VLD3")) + elem = 3; + + if (Name.startswith("VST4") || Name.startswith("VLD4")) + elem = 4; if (Name.find("LN") != std::string::npos) { // To one lane instructions. // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane). + // Utility function takes number of elements, size, and index_align. + if (!Align4OneLaneInst(elem, + slice(insn, 11, 10), + slice(insn, 7, 4), + alignment)) + return false; + // <size> == 16 && Inst{5} == 1 --> DblSpaced = true if (Name.endswith("16") || Name.endswith("16_UPD")) DblSpaced = slice(insn, 5, 5) == 1; @@ -2284,30 +2657,102 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, // <size> == 32 && Inst{6} == 1 --> DblSpaced = true if (Name.endswith("32") || Name.endswith("32_UPD")) DblSpaced = slice(insn, 6, 6) == 1; - + } else if (Name.find("DUP") != std::string::npos) { + // Single element (or structure) to all lanes. + // Inst{9-8} encodes the number of element(s) in the structure, with: + // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32. + // 0b01 (VLD2DUP) + // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0) + // 0b11 (VLD4DUP) + // + // Inst{7-6} encodes the data size, with: + // 0b00 => 8, 0b01 => 16, 0b10 => 32 + // + // Inst{4} (the a bit) encodes the align action (0: standard alignment) + unsigned elem = slice(insn, 9, 8) + 1; + unsigned a = slice(insn, 4, 4); + if (elem != 3) { + // 0b11 is not a valid encoding for Inst{7-6}. + if (slice(insn, 7, 6) == 3) + return false; + unsigned data_size = 8 << slice(insn, 7, 6); + // For VLD1DUP, a bit makes sense only for data size of 16 and 32. + if (a && data_size == 8) + return false; + + // Now we can calculate the alignment! + if (a) + alignment = elem * data_size; + } else { + if (a) { + // A8.6.315 VLD3 (single 3-element structure to all lanes) + // The a bit must be encoded as 0. + return false; + } + } } else { // Multiple n-element structures with type encoded as Inst{11-8}. // See, for example, A8.6.316 VLD4 (multiple 4-element structures). - // n == 2 && type == 0b1001 -> DblSpaced = true - if (Name.startswith("VST2") || Name.startswith("VLD2")) - DblSpaced = slice(insn, 11, 8) == 9; - - // n == 3 && type == 0b0101 -> DblSpaced = true - if (Name.startswith("VST3") || Name.startswith("VLD3")) - DblSpaced = slice(insn, 11, 8) == 5; - - // n == 4 && type == 0b0001 -> DblSpaced = true - if (Name.startswith("VST4") || Name.startswith("VLD4")) - DblSpaced = slice(insn, 11, 8) == 1; - + // Inst{5-4} encodes alignment. + unsigned align = slice(insn, 5, 4); + switch (align) { + default: + break; + case 1: + alignment = 64; break; + case 2: + alignment = 128; break; + case 3: + alignment = 256; break; + } + + unsigned type = slice(insn, 11, 8); + // Reject UNDEFINED instructions based on type and align. + // Plus set DblSpaced flag where appropriate. + switch (elem) { + default: + break; + case 1: + // n == 1 + // A8.6.307 & A8.6.391 + if ((type == 7 && slice(align, 1, 1) == 1) || + (type == 10 && align == 3) || + (type == 6 && slice(align, 1, 1) == 1)) + return false; + break; + case 2: + // n == 2 && type == 0b1001 -> DblSpaced = true + // A8.6.310 & A8.6.393 + if ((type == 8 || type == 9) && align == 3) + return false; + DblSpaced = (type == 9); + break; + case 3: + // n == 3 && type == 0b0101 -> DblSpaced = true + // A8.6.313 & A8.6.395 + if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1) + return false; + DblSpaced = (type == 5); + break; + case 4: + // n == 4 && type == 0b0001 -> DblSpaced = true + // A8.6.316 & A8.6.397 + if (slice(insn, 7, 6) == 3) + return false; + DblSpaced = (type == 1); + break; + } } return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced, B); + slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B); } // VMOV (immediate) // Qd/Dd imm +// VBIC (immediate) +// VORR (immediate) +// Qd/Dd imm src(=Qd/Dd) static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -2334,12 +2779,20 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, case ARM::VMOVv8i16: case ARM::VMVNv4i16: case ARM::VMVNv8i16: + case ARM::VBICiv4i16: + case ARM::VBICiv8i16: + case ARM::VORRiv4i16: + case ARM::VORRiv8i16: esize = ESize16; break; case ARM::VMOVv2i32: case ARM::VMOVv4i32: case ARM::VMVNv2i32: case ARM::VMVNv4i32: + case ARM::VBICiv2i32: + case ARM::VBICiv4i32: + case ARM::VORRiv2i32: + case ARM::VORRiv4i32: esize = ESize32; break; case ARM::VMOVv1i64: @@ -2347,7 +2800,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, esize = ESize64; break; default: - assert(0 && "Unreachable code!"); + assert(0 && "Unexpected opcode!"); return false; } @@ -2356,6 +2809,16 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize))); NumOpsAdded = 2; + + // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in. + if (NumOps >= 3 && + (OpInfo[2].RegClass == ARM::DPRRegClassID || + OpInfo[2].RegClass == ARM::QPRRegClassID)) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, + decodeNEONRd(insn)))); + NumOpsAdded += 1; + } + return true; } @@ -2376,7 +2839,7 @@ enum N2VFlag { // // Vector Move Long: // Qd Dm -// +// // Vector Move Narrow: // Dd Qm // @@ -2518,7 +2981,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); // Add the imm operand. - + // VSHLL has maximum shift count as the imm, inferred from its size. unsigned Imm; switch (Opcode) { @@ -2631,7 +3094,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, // N3RegFrm. if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ) return true; - + // Dm = Inst{5:3-0} => NEON Rm // or // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise @@ -2770,7 +3233,7 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ElemSize esize = Opcode == ARM::VGETLNi32 ? ESize32 : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16 - : ESize32); + : ESize8); // Rt = Inst{15-12} => ARM Rd MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2852,17 +3315,6 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } -// A8.6.41 DMB -// A8.6.42 DSB -// A8.6.49 ISB -static inline bool MemBarrierInstr(uint32_t insn) { - unsigned op7_4 = slice(insn, 7, 4); - if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6)) - return true; - - return false; -} - static inline bool PreLoadOpcode(unsigned Opcode) { switch(Opcode) { case ARM::PLDi12: case ARM::PLDrs: @@ -2878,8 +3330,8 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 3 operands. - // PLDi, PLDWi, PLIi: addrmode_imm12 - // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg + // PLDi12, PLDWi12, PLIi12: addrmode_imm12 + // PLDrs, PLDWrs, PLIrs: ldst_so_reg MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); @@ -2888,10 +3340,19 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, || Opcode == ARM::PLIi12) { unsigned Imm12 = slice(insn, 11, 0); bool Negative = getUBit(insn) == 0; + + // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12. + if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) { + DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n"); + MI.setOpcode(ARM::PLDi12); + } + // -0 is represented specially. All other values are as normal. + int Offset = Negative ? -1 * Imm12 : Imm12; if (Imm12 == 0 && Negative) - Imm12 = INT32_MIN; - MI.addOperand(MCOperand::CreateImm(Imm12)); + Offset = INT32_MIN; + + MI.addOperand(MCOperand::CreateImm(Offset)); NumOpsAdded = 2; } else { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2917,14 +3378,20 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - if (MemBarrierInstr(insn)) { - // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care - // of within the generic ARMBasicMCBuilder::BuildIt() method. - // + if (Opcode == ARM::DMB || Opcode == ARM::DSB) { // Inst{3-0} encodes the memory barrier option for the variants. - MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; + unsigned opt = slice(insn, 3, 0); + switch (opt) { + case ARM_MB::SY: case ARM_MB::ST: + case ARM_MB::ISH: case ARM_MB::ISHST: + case ARM_MB::NSH: case ARM_MB::NSHST: + case ARM_MB::OSH: case ARM_MB::OSHST: + MI.addOperand(MCOperand::CreateImm(opt)); + NumOpsAdded = 1; + return true; + default: + return false; + } } switch (Opcode) { @@ -2936,6 +3403,11 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, case ARM::WFI: case ARM::SEV: return true; + case ARM::SWP: + case ARM::SWPB: + // SWP, SWPB: Rd Rm Rn + // Delegate to DisassembleLdStExFrm().... + return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); default: break; } @@ -2950,20 +3422,32 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // opcodes which match the same real instruction. This is needed since there's // no current handling of optional arguments. Fix here when a better handling // of optional arguments is implemented. - if (Opcode == ARM::CPS3p) { + if (Opcode == ARM::CPS3p) { // M = 1 + // Let's reject these impossible imod values by returning false: + // 1. (imod=0b01) + // + // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an + // invalid combination, so we just check for imod=0b00 here. + if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 3; return true; } - if (Opcode == ARM::CPS2p) { + if (Opcode == ARM::CPS2p) { // mode = 0, M = 0 + // Let's reject these impossible imod values by returning false: + // 1. (imod=0b00,M=0) + // 2. (imod=0b01) + if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags NumOpsAdded = 2; return true; } - if (Opcode == ARM::CPS1p) { + if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1 MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 1; return true; @@ -3142,7 +3626,7 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, return false; } - + /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process /// the possible Predicate and SBitModifier, to build the remaining MCOperand /// constituents. @@ -3154,6 +3638,7 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; const std::string &Name = ARMInsts[Opcode].Name; unsigned Idx = MI.getNumOperands(); + uint64_t TSFlags = ARMInsts[Opcode].TSFlags; // First, we check whether this instr specifies the PredicateOperand through // a pair of TargetOperandInfos with isPredicate() property. @@ -3173,14 +3658,23 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // like ARM. // // A8.6.16 B - if (Name == "t2Bcc") - MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22)))); - else if (Name == "tBcc") - MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8)))); - else + // Check for undefined encodings. + unsigned cond; + if (Name == "t2Bcc") { + if ((cond = slice(insn, 25, 22)) >= 14) + return false; + MI.addOperand(MCOperand::CreateImm(CondCode(cond))); + } else if (Name == "tBcc") { + if ((cond = slice(insn, 11, 8)) == 14) + return false; + MI.addOperand(MCOperand::CreateImm(CondCode(cond))); + } else MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); } else { // ARM instructions get their condition field from Inst{31-28}. + // We should reject Inst{31-28} = 0b1111 as invalid encoding. + if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF) + return false; MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); } } @@ -3243,3 +3737,84 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); } + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has had any PC adjustment made by the caller. If the getOpInfo() +/// function was set as part of the setupBuilderForSymbolicDisassembly() call +/// then that function is called to get any symbolic information at the +/// builder's Address for this instrution. If that returns non-zero then the +/// symbolic information it returns is used to create an MCExpr and that is +/// added as an operand to the MCInst. This function returns true if it adds +/// an operand to the MCInst and false otherwise. +bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value, + uint64_t InstSize, + MCInst &MI) { + if (!GetOpInfo) + return false; + + struct LLVMOpInfo1 SymbolicOp; + SymbolicOp.Value = Value; + if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) + return false; + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, *Ctx); + } + + if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) + MI.addOperand(MCOperand::CreateExpr(Expr)); + else + assert("bad SymbolicOp.VariantKind"); + + return true; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 9c30d332d1f2..a7ba14141c0a 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -22,12 +22,17 @@ #define ARMDISASSEMBLERCORE_H #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm-c/Disassembler.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { +class MCContext; class ARMUtils { public: @@ -134,6 +139,31 @@ static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, Bits |= (Val & Mask) << To; } +// Return an integer result equal to the number of bits of x that are ones. +static inline uint32_t +BitCount (uint64_t x) +{ + // c accumulates the total bits set in x + uint32_t c; + for (c = 0; x; ++c) + { + x &= x - 1; // clear the least significant bit set + } + return c; +} + +static inline bool +BitIsSet (const uint64_t value, const uint64_t bit) +{ + return (value & (1ull << bit)) != 0; +} + +static inline bool +BitIsClear (const uint64_t value, const uint64_t bit) +{ + return (value & (1ull << bit)) == 0; +} + /// Various utilities for checking the target specific flags. /// A unary data processing instruction doesn't have an Rn operand. @@ -141,6 +171,12 @@ static inline bool isUnaryDP(uint64_t TSFlags) { return (TSFlags & ARMII::UnaryDP); } +/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111. +static inline bool isNEONDomain(uint64_t TSFlags) { + return (TSFlags & ARMII::DomainNEON) || + (TSFlags & ARMII::DomainNEONA8); +} + /// This four-bit field describes the addressing mode used. /// See also ARMBaseInstrInfo.h. static inline unsigned getAddrMode(uint64_t TSFlags) { @@ -196,7 +232,7 @@ private: public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) { + SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) { Err = 0; } @@ -255,6 +291,44 @@ private: assert(SP); return slice(SP->ITState, 7, 4); } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + // The address of the instruction being disassembled. + uint64_t Address; + +public: + void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, MCContext *ctx, + uint64_t address) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + Address = address; + } + + uint64_t getBuilderAddress() const { return Address; } + + /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic + /// operand in place of the immediate Value in the MCInst. The immediate + /// Value has had any PC adjustment made by the caller. If the getOpInfo() + /// function was set as part of the setupBuilderForSymbolicDisassembly() call + /// then that function is called to get any symbolic information at the + /// builder's Address for this instrution. If that returns non-zero then the + /// symbolic information it returns is used to create an MCExpr and that is + /// added as an operand to the MCInst. This function returns true if it adds + /// an operand to the MCInst and false otherwise. + bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI); + }; } // namespace llvm diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 23372e022414..8d39982f5640 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -108,6 +108,8 @@ static inline bool IsGPR(unsigned RegClass) { // Utilities for 32-bit Thumb instructions. +static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; } + // Extract imm4: Inst{19-16}. static inline unsigned getImm4(uint32_t insn) { return slice(insn, 19, 16); @@ -398,9 +400,17 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); - MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn) - : (Imm3 ? getT1Imm3(insn) - : getT1Imm5(insn)))); + unsigned Imm = 0; + if (UseRt) + Imm = getT1Imm8(insn); + else if (Imm3) + Imm = getT1Imm3(insn); + else { + Imm = getT1Imm5(insn); + ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11)); + getImmShiftSE(ShOp, Imm); + } + MI.addOperand(MCOperand::CreateImm(Imm)); } ++OpIdx; @@ -469,6 +479,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // tBX_RET: 0 operand // tBX_RET_vararg: Rm // tBLXr_r9: Rm +// tBRIND: Rm static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -476,11 +487,17 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumOps == 0) return true; - // BX/BLX has 1 reg operand: Rm. - if (NumOps == 1) { + // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm. + if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX_Rm || Opcode==ARM::tBRIND) { + if (Opcode != ARM::tBRIND) { + // Handling the two predicate operands before the reg operand. + if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + return false; + NumOpsAdded += 2; + } MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, getT1Rm(insn)))); - NumOpsAdded = 1; + NumOpsAdded += 1; return true; } @@ -598,7 +615,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // A6.2.4 Load/store single data item // -// Load/Store Register (reg|imm): tRd tRn imm5 tRm +// Load/Store Register (reg|imm): tRd tRn imm5|tRm // Load Register Signed Byte|Halfword: tRd tRn tRm static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -607,11 +624,6 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, const TargetOperandInfo *OpInfo = TID.OpInfo; unsigned &OpIdx = NumOpsAdded; - // Table A6-5 16-bit Thumb Load/store instructions - // opA = 0b0101 for STR/LDR (register) and friends. - // Otherwise, we have STR/LDR (immediate) and friends. - bool Imm5 = (opA != 5); - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::tGPRRegClassID @@ -624,28 +636,28 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, getT1tRn(insn)))); OpIdx = 2; - // We have either { imm5, tRm } or { tRm } remaining. - // Process the imm5 first. Note that STR/LDR (register) should skip the imm5 - // offset operand for t_addrmode_s[1|2|4]. + // We have either { imm5 } or { tRm } remaining. + // Note that STR/LDR (register) should skip the imm5 offset operand for + // t_addrmode_s[1|2|4]. assert(OpIdx < NumOps && "More operands expected"); if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - - MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0)); + // Table A6-5 16-bit Thumb Load/store instructions + // opA = 0b0101 for STR/LDR (register) and friends. + // Otherwise, we have STR/LDR (immediate) and friends. + assert(opA != 5 && "Immediate operand expected for this opcode"); + MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn))); + ++OpIdx; + } else { + // The next reg operand is tRm, the offset. + assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID + && "Thumb reg operand expected"); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, + getT1tRm(insn)))); ++OpIdx; } - - // The next reg operand is tRm, the offset. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID - && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg( - Imm5 ? 0 - : getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRm(insn)))); - ++OpIdx; - return true; } @@ -895,6 +907,10 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, } unsigned RegListBits = slice(insn, 7, 0); + if (BitCount(RegListBits) < 1) { + DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n"); + return false; + } // Fill the variadic part of reglist. for (unsigned i = 0; i < 8; ++i) @@ -945,6 +961,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, : (int)Imm8)); // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier(). + // But note that for tBcc, if cond = '1110' then UNDEFINED. + if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) { + DEBUG(errs() << "if cond = '1110' then UNDEFINED\n"); + return false; + } NumOpsAdded = 1; return true; @@ -965,11 +986,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Imm11 = getT1Imm11(insn); - // When executing a Thumb instruction, PC reads as the address of the current - // instruction plus 4. The assembler subtracts 4 from the difference between - // the branch instruction and the target address, disassembler has to add 4 to - // to compensate. - MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4)); + MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1))); NumOpsAdded = 1; @@ -1129,8 +1146,12 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); + unsigned Rn = decodeRn(insn); + if (Rn == 15) { + DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); + return false; + } + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn))); NumOpsAdded = 1; return true; } @@ -1149,7 +1170,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD) && "Unexpected opcode"); - assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5"); + assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4"); NumOpsAdded = 0; @@ -1203,45 +1224,79 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID + && OpInfo[0].RegClass > 0 + && OpInfo[1].RegClass > 0 && "Expect >=2 operands and first two as reg operands"); bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH); bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX); bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD); + unsigned Rt = decodeRd(insn); + unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX. + unsigned Rd = decodeRm(insn); + unsigned Rn = decodeRn(insn); + + // Some sanity checking first. + if (isStore) { + // if d == n || d == t then UNPREDICTABLE + // if d == n || d == t || d == t2 then UNPREDICTABLE + if (isDW) { + if (Rd == Rn || Rd == Rt || Rd == Rt2) { + DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n"); + return false; + } + } else { + if (isSW) { + if (Rt2 == Rn || Rt2 == Rt) { + DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); + return false; + } + } else { + if (Rd == Rn || Rd == Rt) { + DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); + return false; + } + } + } + } else { + // Load + // A8.6.71 LDREXD + // if t == t2 then UNPREDICTABLE + if (isDW && Rt == Rt2) { + DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); + return false; + } + } + // Add the destination operand for store. if (isStore) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - isSW ? decodeRs(insn) : decodeRm(insn)))); + getRegisterEnum(B, OpInfo[OpIdx].RegClass, + isSW ? Rt2 : Rd))); ++OpIdx; } // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, + Rt))); ++OpIdx; // Thumb2 doubleword complication: with an extra source/destination operand. if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, + Rt2))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, + Rn))); ++OpIdx; return true; } -// LLVM, as of Jan-05-2010, does not output <Rt2>, i.e., Rs, in the asm. -// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA. -// // t2LDRDi8: Rd Rs Rn imm8s4 (offset mode) // t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version) // t2STRDi8: Rd Rs Rn imm8s4 (offset mode) @@ -1255,18 +1310,50 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, if (!OpInfo) return false; assert(NumOps >= 4 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == ARM::GPRRegClassID + && OpInfo[0].RegClass > 0 + && OpInfo[0].RegClass == OpInfo[1].RegClass + && OpInfo[2].RegClass > 0 && OpInfo[3].RegClass < 0 && "Expect >= 4 operands and first 3 as reg operands"); + // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1). + unsigned Rt = decodeRd(insn); + unsigned Rt2 = decodeRs(insn); + unsigned Rn = decodeRn(insn); + + // Some sanity checking first. + + // A8.6.67 LDRD (literal) has its W bit as (0). + if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) { + if (Rn == 15 && slice(insn, 21, 21) != 0) + return false; + } else { + // For Dual Store, PC cannot be used as the base register. + if (Rn == 15) { + DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); + return false; + } + } + if (Rt == Rt2) { + DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); + return false; + } + if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) { + if (Rn == Rt || Rn == Rt2) { + DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n"); + return false; + } + } + // Add the <Rt> <Rt2> operands. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + unsigned RegClassPair = OpInfo[0].RegClass; + unsigned RegClassBase = OpInfo[2].RegClass; + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase, decodeRn(insn)))); // Finally add (+/-)imm8*4, depending on the U bit. @@ -1394,9 +1481,12 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - if (Thumb2ShiftOpcode(Opcode)) - MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn))); - else { + if (Thumb2ShiftOpcode(Opcode)) { + unsigned Imm = getShiftAmtBits(insn); + ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4)); + getImmShiftSE(ShOp, Imm); + MI.addOperand(MCOperand::CreateImm(Imm)); + } else { // Build the constant shift specifier operand. unsigned bits2 = getShiftTypeBits(insn); unsigned imm5 = getShiftAmtBits(insn); @@ -1421,7 +1511,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const TargetInstrDesc &TID = ARMInsts[Opcode]; + const TargetOperandInfo *OpInfo = TID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1448,8 +1539,15 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); return false; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); + int Idx; + if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + // The reg operand is tied to the first reg operand. + MI.addOperand(MI.getOperand(Idx)); + } else { + // Add second reg operand. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, + decodeRn(insn)))); + } ++OpIdx; } @@ -1518,7 +1616,7 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, // o t2ADDri12, t2SUBri12: Rs Rn imm12 // o t2LEApcrel (ADR): Rs imm12 // o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm -// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010) +// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm // o t2MOVi16: Rs imm16 // o t2MOVTi16: Rs imm16 // o t2SBFX (SBFX): Rs Rn lsb width @@ -1579,9 +1677,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12 || Opcode == ARM::t2LEApcrel) MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); - else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) - MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { + else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) { + if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI)) + MI.addOperand(MCOperand::CreateImm(getImm16(insn))); + } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1625,8 +1724,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // A8.6.26 // t2BXJ -> Rn // -// Miscellaneous control: t2DMBsy (and its t2DMB variants), -// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX +// Miscellaneous control: // -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) // // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV @@ -1643,6 +1741,22 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, if (NumOps == 0) return true; + if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) { + // Inst{3-0} encodes the memory barrier option for the variants. + unsigned opt = slice(insn, 3, 0); + switch (opt) { + case ARM_MB::SY: case ARM_MB::ST: + case ARM_MB::ISH: case ARM_MB::ISHST: + case ARM_MB::NSH: case ARM_MB::NSHST: + case ARM_MB::OSH: case ARM_MB::OSHST: + MI.addOperand(MCOperand::CreateImm(opt)); + NumOpsAdded = 1; + return true; + default: + return false; + } + } + if (t2MiscCtrlInstr(insn)) return true; @@ -1719,6 +1833,17 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, return true; } + // Some instructions have predicate operands first before the immediate. + if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) { + // Handling the two predicate operands before the imm operand. + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + NumOpsAdded += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } + } + // Add the imm operand. int Offset = 0; @@ -1739,13 +1864,12 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, Offset = decodeImm32_BLX(insn); break; } - // When executing a Thumb instruction, PC reads as the address of the current - // instruction plus 4. The assembler subtracts 4 from the difference between - // the branch instruction and the target address, disassembler has to add 4 to - // to compensate. - MI.addOperand(MCOperand::CreateImm(Offset + 4)); - NumOpsAdded = 1; + if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Offset)); + + // This is an increment as some predicate operands may have been added first. + NumOpsAdded += 1; return true; } @@ -1787,7 +1911,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRn(insn)))); ++OpIdx; - if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { + if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { @@ -1795,17 +1919,17 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); int Offset = 0; - if (slice(insn, 19, 16) == 0xFF) { - bool Negative = slice(insn, 23, 23) == 0; - unsigned Imm12 = getImm12(insn); - Offset = Negative ? -1 - Imm12 : 1 * Imm12; - } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || - Opcode == ARM::t2PLIi8) { + if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || + Opcode == ARM::t2PLIi8) { // A8.6.117 Encoding T2: add = FALSE unsigned Imm8 = getImm8(insn); - Offset = -1 - Imm8; - } else // The i12 forms. See, for example, A8.6.117 Encoding T1. + Offset = -1 * Imm8; + } else { + // The i12 forms. See, for example, A8.6.117 Encoding T1. + // Note that currently t2PLDi12 also handles the previously named t2PLDpci + // opcode, that's why we use decodeImm12(insn) which returns +/- imm12. Offset = decodeImm12(insn); + } MI.addOperand(MCOperand::CreateImm(Offset)); } ++OpIdx; @@ -1820,6 +1944,87 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load, + unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) { + + // Inst{22-21} encodes the data item transferred for load/store. + // For single word, it is encoded as ob10. + bool Word = (slice(insn, 22, 21) == 2); + bool Half = (slice(insn, 22, 21) == 1); + bool Byte = (slice(insn, 22, 21) == 0); + + if (UseRm && BadReg(R2)) { + DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n"); + return true; + } + + if (Load) { + if (!Word && R0 == 13) { + DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n"); + return true; + } + if (Byte) { + if (WB && R0 == 15 && slice(insn, 10, 8) == 3) { + // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0) + DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); + return true; + } + } + // A6.3.8 Load halfword, memory hints + if (Half) { + if (WB) { + if (R0 == R1) { + // A8.6.82 LDRSH (immediate) Encoding T2 + DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n"); + return true; + } + if (R0 == 15 && slice(insn, 10, 8) == 3) { + // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0) + DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); + return true; + } + } else { + if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) { + if (R0 == 15 && slice(insn, 10, 8) == 4) { + // A8.6.82 LDRSH (immediate) Encoding T2 + DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE" + << " \"Unallocated memory hints\"\n"); + return true; + } + } else { + if (R0 == 15) { + // A8.6.82 LDRSH (immediate) Encoding T1 + DEBUG(errs() << "if Rt == '1111' then SEE" + << " \"Unallocated memory hints\"\n"); + return true; + } + } + } + } + } else { + if (WB && R0 == R1) { + DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); + return true; + } + if ((WB && R0 == 15) || (!WB && R1 == 15)) { + DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n"); + return true; + } + if (Word) { + if ((WB && R1 == 15) || (!WB && R0 == 15)) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) { + DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n"); + return true; + } + } + } + return false; +} + // A6.3.10 Store single data item // A6.3.9 Load byte, memory hints // A6.3.8 Load halfword, memory hints @@ -1865,16 +2070,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, OpIdx = 0; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass > 0 && + OpInfo[1].RegClass > 0 && "Expect >= 3 operands and first two as reg operands"); - bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID); + bool ThreeReg = (OpInfo[2].RegClass > 0); bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1; bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td // Build the register operands, followed by the immediate. - unsigned R0, R1, R2 = 0; + unsigned R0 = 0, R1 = 0, R2 = 0; unsigned Rd = decodeRd(insn); int Imm = 0; @@ -1905,19 +2110,24 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, Imm = decodeImm8(insn); } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, R0))); ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, R1))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + // This could be an offset register or a TIED_TO register. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, R2))); ++OpIdx; } + if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO, + TIED_TO)) + return false; + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1947,25 +2157,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::rGPRRegClassID && - OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[0].RegClass > 0 && + OpInfo[1].RegClass > 0 && "Expect >= 2 operands and first two as reg operands"); // Build the register operands, followed by the optional rotation amount. - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID; + bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRm(insn)))); ++OpIdx; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 1499da00ae1c..fc2aa7526b7f 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -29,6 +29,9 @@ StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); } +StringRef ARMInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); +} void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { unsigned Opcode = MI->getOpcode(); @@ -133,9 +136,10 @@ static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream, unsigned Rot = ARM_AM::getSOImmValRot(V); // Print low-level immediate formation info, per - // A5.1.3: "Data-processing operands - Immediate". + // A5.2.3: Data-processing (immediate), and + // A5.2.4: Modified immediate constants in ARM instructions if (Rot) { - O << "#" << Imm << ", " << Rot; + O << "#" << Imm << ", #" << Rot; // Pretty printed version. if (CommentStream) *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n"; @@ -178,18 +182,16 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, } } +//===--------------------------------------------------------------------===// +// Addressing Mode #2 +//===--------------------------------------------------------------------===// -void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { @@ -212,6 +214,50 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, O << "]"; } +void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + O << "[" << getRegisterName(MO1.getReg()) << "], "; + + if (!MO2.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ImmOffs; + return; + } + + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << getRegisterName(MO2.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; +} + +void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, O); + return; + } + + const MCOperand &MO3 = MI->getOperand(Op+2); + unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm()); + + if (IdxMode == ARMII::IndexModePost) { + printAM2PostIndexOp(MI, Op, O); + return; + } + printAM2PreOrOffsetIndexOp(MI, Op, O); +} + void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -235,11 +281,35 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, << " #" << ShImm; } -void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); +//===--------------------------------------------------------------------===// +// Addressing Mode #3 +//===--------------------------------------------------------------------===// + +void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + O << "[" << getRegisterName(MO1.getReg()) << "], "; + + if (MO2.getReg()) { + O << (char)ARM_AM::getAM3Op(MO3.getImm()) + << getRegisterName(MO2.getReg()); + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ImmOffs; +} + +void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); O << '[' << getRegisterName(MO1.getReg()); @@ -256,6 +326,18 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, O << ']'; } +void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO3 = MI->getOperand(Op+2); + unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); + + if (IdxMode == ARMII::IndexModePost) { + printAM3PostIndexOp(MI, Op, O); + return; + } + printAM3PreOrOffsetIndexOp(MI, Op, O); +} + void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -314,6 +396,12 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << "]"; } +void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + O << "[" << getRegisterName(MO1.getReg()) << "]"; +} + void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -414,16 +502,6 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, } } -void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - O << '#'; - if (Op.getImm() < 0) - O << '-' << (-Op.getImm() - 1); - else - O << Op.getImm(); -} - void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 679d3135ea6d..b3ac03ab2200 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -17,14 +17,18 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; + +class MCOperand; +class TargetMachine; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} + ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); virtual StringRef getOpcodeName(unsigned Opcode) const; + virtual StringRef getRegName(unsigned RegNo) const; static const char *getInstructionName(unsigned Opcode); @@ -38,15 +42,25 @@ public: void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -87,9 +101,7 @@ public: void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 9a27e2f47064..f6d024232eae 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -15,11 +15,13 @@ #define DEBUG_TYPE "mlx-expansion" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -49,15 +51,17 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; + bool isA9; unsigned MIIdx; MachineInstr* LastMIs[4]; + SmallPtrSet<MachineInstr*, 4> IgnoreStall; void clearStack(); void pushStack(MachineInstr *MI); MachineInstr *getAccDefMI(MachineInstr *MI) const; unsigned getDefReg(MachineInstr *MI) const; bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; - bool FindMLxHazard(MachineInstr *MI) const; + bool FindMLxHazard(MachineInstr *MI); void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned MulOpc, unsigned AddSubOpc, bool NegAcc, bool HasLane); @@ -146,7 +150,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { } -bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { +bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { if (NumExpand >= ExpandLimit) return false; @@ -154,7 +158,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { return true; MachineInstr *DefMI = getAccDefMI(MI); - if (TII->isFpMLxInstruction(DefMI->getOpcode())) + if (TII->isFpMLxInstruction(DefMI->getOpcode())) { // r0 = vmla // r3 = vmla r0, r1, r2 // takes 16 - 17 cycles @@ -163,24 +167,33 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { // r4 = vmul r1, r2 // r3 = vadd r0, r4 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. + IgnoreStall.insert(DefMI); return true; + } + + if (IgnoreStall.count(MI)) + return false; // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall // preserves the in-order retirement of the instructions. // Look at the next few instructions, if *most* of them can cause hazards, // then the scheduler can't *fix* this, we'd better break up the VMLA. + unsigned Limit1 = isA9 ? 1 : 4; + unsigned Limit2 = isA9 ? 1 : 4; for (unsigned i = 1; i <= 4; ++i) { int Idx = ((int)MIIdx - i + 4) % 4; MachineInstr *NextMI = LastMIs[Idx]; if (!NextMI) continue; - if (TII->canCauseFpMLxStall(NextMI->getOpcode())) - return true; + if (TII->canCauseFpMLxStall(NextMI->getOpcode())) { + if (i <= Limit1) + return true; + } // Look for VMLx RAW hazard. - if (hasRAWHazard(getDefReg(MI), NextMI)) + if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI)) return true; } @@ -248,6 +261,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { bool Changed = false; clearStack(); + IgnoreStall.clear(); unsigned Skip = 0; MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); @@ -299,6 +313,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); + const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); + isA9 = STI->isCortexA9(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 9fc3fb92cb2c..8ba9a27e95c8 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -657,3 +657,27 @@ Note that both "tst" and "moveq" are redundant. //===---------------------------------------------------------------------===// +When loading immediate constants with movt/movw, if there are multiple +constants needed with the same low 16 bits, and those values are not live at +the same time, it would be possible to use a single movw instruction, followed +by multiple movt instructions to rewrite the high bits to different values. +For example: + + volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4, + !tbaa +!0 + volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4, + !tbaa +!0 + +is compiled and optimized to: + + movw r0, #32796 + mov.w r1, #-1 + movt r0, #20480 + str r1, [r0] + movw r0, #32796 @ <= this MOVW is not needed, value is there already + movt r0, #20482 + str r1, [r0] + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 233e16538771..dee3d278203f 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -34,13 +34,14 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const return !MF.getFrameInfo()->hasVarSizedObjects(); } -static void emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, - int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); +static void +emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, DebugLoc dl, + const Thumb1RegisterInfo &MRI, + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, + MRI, MIFlags); } void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { @@ -70,11 +71,13 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, + MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + MachineInstr::FrameSetup); return; } @@ -131,7 +134,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); + .addFrameIndex(FramePtrSpillFI).addImm(0) + .setMIFlags(MachineInstr::FrameSetup); if (NumBytes > 7) // If offset is > 7 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. @@ -140,7 +144,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes) // Insert it after all the callee-save spills. - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + MachineInstr::FrameSetup); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -156,7 +161,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // to reference locals. if (RegInfo->hasBasePointer(MF)) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); - + // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. @@ -232,8 +237,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); - emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes, - TII, *RegInfo, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, + TII, *RegInfo); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4); } else @@ -307,6 +312,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MIB.addReg(Reg, getKillRegState(isKill)); } + MIB.setMIFlags(MachineInstr::FrameSetup); return true; } diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index c592e125de17..bcfc5165fad0 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #ifndef __THUMB_FRAMEINFO_H_ -#define __THUMM_FRAMEINFO_H_ +#define __THUMB_FRAMEINFO_H_ #include "ARM.h" #include "ARMFrameLowering.h" diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index f62a13e3e288..33cefb6e79bb 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -31,8 +31,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -48,15 +46,29 @@ Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, : ARMBaseRegisterInfo(tii, sti) { } +const TargetRegisterClass* +Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) + const { + if (RC == ARM::tGPRRegisterClass || RC->hasSuperClass(ARM::tGPRRegisterClass)) + return ARM::tGPRRegisterClass; + return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC); +} + +const TargetRegisterClass * +Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const { + return ARM::tGPRRegisterClass; +} + /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. -void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, - unsigned PredReg) const { +void +Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -64,8 +76,9 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg) + .setMIFlags(MIFlags); } @@ -76,11 +89,12 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, static void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, bool CanChangeCC, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl) { + unsigned MIFlags = MachineInstr::NoFlags) { MachineFunction &MF = *MBB.getParent(); bool isHigh = !isARMLowRegister(DestReg) || (BaseReg != 0 && !isARMLowRegister(BaseReg)); @@ -101,14 +115,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, if (NumBytes <= 255 && NumBytes >= 0) AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes); + .addImm(NumBytes).setMIFlags(MIFlags); else if (NumBytes < 0 && NumBytes >= -255) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes); + .addImm(NumBytes).setMIFlags(MIFlags); AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) - .addReg(LdReg, RegState::Kill); + .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags); } else - MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes); + MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, + ARMCC::AL, 0, MIFlags); // Emit add / sub. int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); @@ -151,10 +166,11 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, /// a destreg = basereg + immediate in Thumb code. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl) { + unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; @@ -211,8 +227,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, if (NumMIs > Threshold) { // This will expand into too many instructions. Load the immediate from a // constpool entry. - emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, - MRI, dl); + emitThumbRegPlusImmInReg(MBB, MBBI, dl, + DestReg, BaseReg, NumBytes, true, + TII, MRI, MIFlags); return; } @@ -224,11 +241,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Bytes -= ThisVal; const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill); + .addReg(BaseReg, RegState::Kill) + .setMIFlags(MIFlags); } BaseReg = DestReg; } @@ -243,9 +261,10 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); - MIB .addReg(DestReg).addImm(ThisVal); + MIB.addReg(DestReg).addImm(ThisVal); if (NeedPred) MIB = AddDefaultPred(MIB); + MIB.setMIFlags(MIFlags); } else { bool isKill = BaseReg != ARM::SP; @@ -255,8 +274,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); if (NeedPred) MIB = AddDefaultPred(MIB); - BaseReg = DestReg; + MIB.setMIFlags(MIFlags); + BaseReg = DestReg; if (Opc == ARM::tADDrSPi) { // r4 = add sp, imm // r4 = add r4, imm @@ -274,7 +294,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, const TargetInstrDesc &TID = TII.get(ExtraOpc); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3)); + .addImm(((unsigned)NumBytes) & 3) + .setMIFlags(MIFlags)); } } @@ -283,8 +304,8 @@ static void emitSPUpdate(MachineBasicBlock &MBB, const TargetInstrInfo &TII, DebugLoc dl, const Thumb1RegisterInfo &MRI, int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, + MRI); } void Thumb1RegisterInfo:: @@ -337,7 +358,7 @@ static void emitThumbConstant(MachineBasicBlock &MBB, DestReg)) .addImm(ThisVal)); if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); if (isSub) { const TargetInstrDesc &TID = TII.get(ARM::tRSB); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) @@ -430,8 +451,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // MI would expand into a large number of instructions. Don't try to // simplify the immediate. if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, + *this); MBB.erase(II); return true; } @@ -450,8 +471,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, } Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = llvm::next(II); - emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII, + *this); } else { // Translate r0 = add sp, -imm to // r0 = -imm (this is then translated into a series of instructons) @@ -645,15 +666,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, bool UseRR = false; if (Opcode == ARM::tRestore) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); + emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); UseRR = true; } } else { - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII, + *this); } MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); @@ -668,15 +689,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, - Offset, false, TII, *this, dl); + emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); UseRR = true; } } else - emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, + *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(i).ChangeToRegister(VReg, false, false, true); if (UseRR) diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 8a87cc55c829..9060e59e5980 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -28,6 +28,11 @@ struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void emitLoadConstPool(MachineBasicBlock &MBB, @@ -35,7 +40,8 @@ public: DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const; /// Code Generation virtual methods... void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 9b1073be3c8e..d169dbb7f197 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -184,7 +184,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, unsigned MIFlags) { bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -198,14 +198,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // Use a movw to materialize the 16-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) .addImm(NumBytes) - .addImm((unsigned)Pred).addReg(PredReg); + .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); Fits = true; } else if ((NumBytes & 0xffff) == 0) { // Use a movt to materialize the 32-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) .addReg(DestReg) .addImm(NumBytes >> 16) - .addImm((unsigned)Pred).addReg(PredReg); + .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); Fits = true; } @@ -214,12 +214,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) .addReg(BaseReg, RegState::Kill) .addReg(DestReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); } else { BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) .addReg(DestReg, RegState::Kill) .addReg(BaseReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); } return; } @@ -230,7 +232,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = 0; if (DestReg == ARM::SP && BaseReg != ARM::SP) { // mov sp, rn. Note t2MOVr cannot be used. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg) + .addReg(BaseReg).setMIFlags(MIFlags); BaseReg = ARM::SP; continue; } @@ -243,7 +246,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; // FIXME: Fix Thumb1 immediate encoding. BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg).addImm(ThisVal/4); + .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags); NumBytes = 0; continue; } @@ -283,7 +286,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) .addReg(BaseReg, RegState::Kill) - .addImm(ThisVal)); + .addImm(ThisVal)).setMIFlags(MIFlags); if (HasCCOut) AddDefaultCC(MIB); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 099b8f724140..355c3bf0352c 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -13,26 +13,15 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMBaseInstrInfo.h" -#include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" #include "Thumb2RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" using namespace llvm; Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, @@ -42,13 +31,14 @@ Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. -void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, - unsigned PredReg) const { +void +Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -57,5 +47,6 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0); + .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) + .setMIFlags(MIFlags); } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index b3cf2e5b0935..824378aeab4e 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -35,7 +35,8 @@ public: DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const; }; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index cc8f61cd72a4..ce2e9663fb74 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -12,6 +12,7 @@ #include "ARMAddressingModes.h" #include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -49,82 +50,86 @@ namespace { // 1 - No cc field. // 2 - Always set CPSR. unsigned PredCC2 : 2; + unsigned PartFlag : 1; // 16-bit instruction does partial flag update unsigned Special : 1; // Needs to be dealt with specially }; static const ReduceEntry ReduceTable[] = { - // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S - { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, - { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, // Note: immediate scale is 4. - { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 1 }, - { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, - { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, - { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0,1 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, //FIXME: Disable CMN, as CCodes are backwards from compare expectations - //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 1 }, - { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, + //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, // FIXME: adr.n immediate offset must be multiple of 4. - //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 }, + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 }, + // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less + // likely to cause issue in the loop. As a size / performance workaround, + // they are not marked as such. + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, - { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, - { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0 }, - { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 }, - { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, - { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 }, - { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0,0 }, + { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0,0 }, + { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0,0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. - { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 1 }, - - { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 1 }, + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 }, // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent - { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 }, }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -133,6 +138,7 @@ namespace { Thumb2SizeReduce(); const Thumb2InstrInfo *TII; + const ARMSubtarget *STI; virtual bool runOnMachineFunction(MachineFunction &MF); @@ -144,6 +150,8 @@ namespace { /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. DenseMap<unsigned, unsigned> ReduceOpcodeMap; + bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); + bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, bool LiveCPSR, bool &HasCC, bool &CCDead); @@ -152,19 +160,20 @@ namespace { const ReduceEntry &Entry); bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, - const ReduceEntry &Entry, bool LiveCPSR); + const ReduceEntry &Entry, bool LiveCPSR, + MachineInstr *CPSRDef); /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR); + bool LiveCPSR, MachineInstr *CPSRDef); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR); + bool LiveCPSR, MachineInstr *CPSRDef); /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); @@ -187,6 +196,52 @@ static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { return false; } +/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, +/// the 's' 16-bit instruction partially update CPSR. Abort the +/// transformation to avoid adding false dependency on last CPSR setting +/// instruction which hurts the ability for out-of-order execution engine +/// to do register renaming magic. +/// This function checks if there is a read-of-write dependency between the +/// last instruction that defines the CPSR and the current instruction. If there +/// is, then there is no harm done since the instruction cannot be retired +/// before the CPSR setting instruction anyway. +/// Note, we are not doing full dependency analysis here for the sake of compile +/// time. We're not looking for cases like: +/// r0 = muls ... +/// r1 = add.w r0, ... +/// ... +/// = mul.w r1 +/// In this case it would have been ok to narrow the mul.w to muls since there +/// are indirect RAW dependency between the muls and the mul.w +bool +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { + if (!Def || !STI->avoidCPSRPartialUpdate()) + return false; + + SmallSet<unsigned, 2> Defs; + for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Def->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || Reg == ARM::CPSR) + continue; + Defs.insert(Reg); + } + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Use->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (Defs.count(Reg)) + return false; + } + + // No read-after-write dependency. The narrowing will add false dependency. + return true; +} + bool Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, @@ -410,7 +465,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. - (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); @@ -422,7 +480,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; @@ -440,12 +498,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, switch (Opc) { default: break; case ARM::t2ADDSri: { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) return true; // fallthrough } case ARM::t2ADDSrr: - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } } break; @@ -453,13 +511,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2RSBri: case ARM::t2RSBSri: if (MI->getOperand(2).getImm() == 0) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two @@ -468,17 +526,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = - { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 }; - if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR)) + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } case ARM::t2ADDrSPi: { static const ReduceEntry NarrowEntry = - { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 }; + { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 }; if (MI->getOperand(0).getReg() == ARM::SP) - return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR); - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } } return false; @@ -487,7 +545,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; @@ -542,6 +600,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(CPSRDef, MI)) + return false; + // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); @@ -563,6 +627,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addOperand(MI->getOperand(i)); } + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); @@ -573,7 +640,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; @@ -626,6 +693,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(CPSRDef, MI)) + return false; + // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); @@ -663,6 +736,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (!TID.isPredicable() && NewTID.isPredicable()) AddDefaultPred(MIB); + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); @@ -670,7 +746,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, return true; } -static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { +static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { bool HasDef = false; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); @@ -678,6 +754,8 @@ static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { continue; if (MO.getReg() != ARM::CPSR) continue; + + DefCPSR = true; if (!MO.isDead()) HasDef = true; } @@ -707,6 +785,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); + MachineInstr *CPSRDef = 0; MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; @@ -722,7 +801,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) { + if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -731,7 +810,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } // Try to transform to a 16-bit two-address instruction. - if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { + if (Entry.NarrowOpc2 && + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -739,7 +819,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } // Try to transform to a 16-bit non-two-address instruction. - if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { + if (Entry.NarrowOpc1 && + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -747,7 +828,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } ProcessNext: - LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR); + bool DefCPSR = false; + LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); + if (MI->getDesc().isCall()) + // Calls don't really set CPSR. + CPSRDef = 0; + else if (DefCPSR) + // This is the last CPSR defining instruction. + CPSRDef = MI; } return Modified; @@ -756,6 +844,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); + STI = &TM.getSubtarget<ARMSubtarget>(); bool Modified = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) |