diff options
Diffstat (limited to 'lib/Target/ARM')
28 files changed, 1722 insertions, 464 deletions
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 183915335192..c603708652f6 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -15,7 +15,6 @@ #define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -38,7 +37,7 @@ namespace ARM_AM { static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { - default: llvm_unreachable("Unknown shift opc!"); + default: assert(0 && "Unknown shift opc!"); case ARM_AM::asr: return "asr"; case ARM_AM::lsl: return "lsl"; case ARM_AM::lsr: return "lsr"; @@ -71,7 +70,7 @@ namespace ARM_AM { static inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { - default: llvm_unreachable("Unknown addressing sub-mode!"); + default: assert(0 && "Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; case ARM_AM::ib: return "ib"; case ARM_AM::da: return "da"; @@ -81,7 +80,7 @@ namespace ARM_AM { static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { switch (Mode) { - default: llvm_unreachable("Unknown addressing sub-mode!"); + default: assert(0 && "Unknown addressing sub-mode!"); case ARM_AM::ia: return isLD ? "fd" : "ea"; case ARM_AM::ib: return isLD ? "ed" : "fa"; case ARM_AM::da: return isLD ? "fa" : "ed"; @@ -342,6 +341,66 @@ namespace ARM_AM { return -1; } + static inline unsigned getT2SOImmValRotate(unsigned V) { + if ((V & ~255U) == 0) return 0; + // Use CTZ to compute the rotate amount. + unsigned RotAmt = CountTrailingZeros_32(V); + return (32 - RotAmt) & 31; + } + + static inline bool isT2SOImmTwoPartVal (unsigned Imm) { + unsigned V = Imm; + // Passing values can be any combination of splat values and shifter + // values. If this can be handled with a single shifter or splat, bail + // out. Those should be handled directly, not with a two-part val. + if (getT2SOImmValSplatVal(V) != -1) + return false; + V = rotr32 (~255U, getT2SOImmValRotate(V)) & V; + if (V == 0) + return false; + + // If this can be handled as an immediate, accept. + if (getT2SOImmVal(V) != -1) return true; + + // Likewise, try masking out a splat value first. + V = Imm; + if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1) + V &= ~0xff00ff00U; + else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1) + V &= ~0x00ff00ffU; + // If what's left can be handled as an immediate, accept. + if (getT2SOImmVal(V) != -1) return true; + + // Otherwise, do not accept. + return false; + } + + static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) { + assert (isT2SOImmTwoPartVal(Imm) && + "Immedate cannot be encoded as two part immediate!"); + // Try a shifter operand as one part + unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm; + // If the rest is encodable as an immediate, then return it. + if (getT2SOImmVal(V) != -1) return V; + + // Try masking out a splat value first. + if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1) + return Imm & 0xff00ff00U; + + // The other splat is all that's left as an option. + assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1); + return Imm & 0x00ff00ffU; + } + + static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) { + // Mask out the first hunk + Imm ^= getT2SOImmTwoPartFirst(Imm); + // Return what's left + assert (getT2SOImmVal(Imm) != -1 && + "Unable to encode second part of T2 two part SO immediate"); + return Imm; + } + //===--------------------------------------------------------------------===// // Addressing Mode #2 diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 42ef183e5261..00e75310b6fb 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -36,8 +36,18 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<bool> +ScavengeFrameIndexVals("arm-virtual-frame-index-vals", cl::Hidden, + cl::init(false), + cl::desc("Resolve frame index values via scavenging in PEI")); + +static cl::opt<bool> +ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(false), + cl::desc("Reuse repeated frame index values")); + unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { if (isSPVFP) @@ -740,8 +750,7 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg, case ARM::R1: return ARM::R0; case ARM::R3: - // FIXME! - return STI.isThumb1Only() ? 0 : ARM::R2; + return ARM::R2; case ARM::R5: return ARM::R4; case ARM::R7: @@ -830,8 +839,7 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg, case ARM::R0: return ARM::R1; case ARM::R2: - // FIXME! - return STI.isThumb1Only() ? 0 : ARM::R3; + return ARM::R3; case ARM::R4: return ARM::R5; case ARM::R6: @@ -937,6 +945,11 @@ requiresRegisterScavenging(const MachineFunction &MF) const { return true; } +bool ARMBaseRegisterInfo:: +requiresFrameIndexScavenging(const MachineFunction &MF) const { + return ScavengeFrameIndexVals; +} + // hasReservedCallFrame - Under normal circumstances, when a frame pointer is // not required, we reserve argument space for call sites in the function // immediately on entry to the current function. This eliminates the need for @@ -1077,14 +1090,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) && "This code isn't needed if offset already handled!"); - // Insert a set of r12 with the full address: r12 = sp + offset - // If the offset we have is too large to fit into the instruction, we need - // to form it with a series of ADDri's. Do this by taking 8-bit chunks - // out of 'Offset'. - unsigned ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI); - if (ScratchReg == 0) - // No register is "free". Scavenge a register. - ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj); + unsigned ScratchReg = 0; int PIdx = MI.findFirstPredOperandIdx(); ARMCC::CondCodes Pred = (PIdx == -1) ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); @@ -1093,6 +1099,19 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Must be addrmode4. MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { + if (!ScavengeFrameIndexVals) { + // Insert a set of r12 with the full address: r12 = sp + offset + // If the offset we have is too large to fit into the instruction, we need + // to form it with a series of ADDri's. Do this by taking 8-bit chunks + // out of 'Offset'. + ScratchReg = findScratchRegister(RS, ARM::GPRRegisterClass, AFI); + if (ScratchReg == 0) + // No register is "free". Scavenge a register. + ScratchReg = RS->scavengeRegister(ARM::GPRRegisterClass, II, SPAdj); + } else { + ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); + *Value = Offset; + } if (!AFI->isThumbFunction()) emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, Pred, PredReg, TII); @@ -1102,8 +1121,10 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, Offset, Pred, PredReg, TII); } MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true); + if (!ReuseFrameIndexVals || !ScavengeFrameIndexVals) + ScratchReg = 0; } - return 0; + return ScratchReg; } /// Move iterator pass the next bunch of callee save load / store ops for diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index da703fbc8c19..f7d38e540def 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -122,6 +122,8 @@ public: virtual bool requiresRegisterScavenging(const MachineFunction &MF) const; + virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const; + virtual bool hasReservedCallFrame(MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index b0bd04bade40..c995ff2d9906 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -28,9 +28,11 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include <algorithm> using namespace llvm; STATISTIC(NumCPEs, "Number of constpool entries"); @@ -70,6 +72,10 @@ namespace { /// to a return, unreachable, or unconditional branch). std::vector<MachineBasicBlock*> WaterList; + /// NewWaterList - The subset of WaterList that was created since the + /// previous iteration by inserting unconditional branches. + SmallSet<MachineBasicBlock*, 4> NewWaterList; + typedef std::vector<MachineBasicBlock*>::iterator water_iterator; /// CPUser - One user of a constant pool, keeping the machine instruction @@ -175,9 +181,7 @@ namespace { void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta); bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI); int LookForExistingCPEntry(CPUser& U, unsigned UserOffset); - bool LookForWater(CPUser&U, unsigned UserOffset, - MachineBasicBlock *&NewMBB); - MachineBasicBlock *AcceptWater(water_iterator IP); + bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter); void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset, MachineBasicBlock *&NewMBB); bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex); @@ -297,6 +301,10 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); + + // Clear NewWaterList now. If we split a block for branches, it should + // appear as "new water" for the next iteration of constant pool placement. + NewWaterList.clear(); bool BRChange = false; for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) @@ -629,7 +637,7 @@ void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) { /// Split the basic block containing MI into two blocks, which are joined by -/// an unconditional branch. Update datastructures and renumber blocks to +/// an unconditional branch. Update data structures and renumber blocks to /// account for this change and returns the newly created block. MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { MachineBasicBlock *OrigBB = MI->getParent(); @@ -691,6 +699,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { WaterList.insert(next(IP), NewBB); else WaterList.insert(IP, OrigBB); + NewWaterList.insert(OrigBB); // Figure out how large the first NewMBB is. (It cannot // contain a constpool_entry or tablejump.) @@ -941,30 +950,16 @@ static inline unsigned getUnconditionalBrDisp(int Opc) { return ((1<<23)-1)*4; } -/// AcceptWater - Small amount of common code factored out of the following. -/// -MachineBasicBlock *ARMConstantIslands::AcceptWater(water_iterator IP) { - DEBUG(errs() << "found water in range\n"); - MachineBasicBlock *WaterBB = *IP; - // Remove the original WaterList entry; we want subsequent - // insertions in this vicinity to go after the one we're - // about to insert. This considerably reduces the number - // of times we have to move the same CPE more than once. - WaterList.erase(IP); - // CPE goes before following block (NewMBB). - return next(MachineFunction::iterator(WaterBB)); -} - -/// LookForWater - look for an existing entry in the WaterList in which +/// LookForWater - Look for an existing entry in the WaterList in which /// we can place the CPE referenced from U so it's within range of U's MI. -/// Returns true if found, false if not. If it returns true, NewMBB +/// Returns true if found, false if not. If it returns true, WaterIter /// is set to the WaterList entry. For Thumb, prefer water that will not /// introduce padding to water that will. To ensure that this pass /// terminates, the CPE location for a particular CPUser is only allowed to /// move to a lower address, so search backward from the end of the list and /// prefer the first water that is in range. bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, - MachineBasicBlock *&NewMBB) { + water_iterator &WaterIter) { if (WaterList.empty()) return false; @@ -973,9 +968,17 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, for (water_iterator IP = prior(WaterList.end()), B = WaterList.begin();; --IP) { MachineBasicBlock* WaterBB = *IP; - // Check if water is in range and at a lower address than the current one. - if (WaterBB->getNumber() < U.HighWaterMark->getNumber() && - WaterIsInRange(UserOffset, WaterBB, U)) { + // Check if water is in range and is either at a lower address than the + // current "high water mark" or a new water block that was created since + // the previous iteration by inserting an unconditional branch. In the + // latter case, we want to allow resetting the high water mark back to + // this new water since we haven't seen it before. Inserting branches + // should be relatively uncommon and when it does happen, we want to be + // sure to take advantage of it for all the CPEs near that block, so that + // we don't insert more branches than necessary. + if (WaterIsInRange(UserOffset, WaterBB, U) && + (WaterBB->getNumber() < U.HighWaterMark->getNumber() || + NewWaterList.count(WaterBB))) { unsigned WBBId = WaterBB->getNumber(); if (isThumb && (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { @@ -986,7 +989,7 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, IPThatWouldPad = IP; } } else { - NewMBB = AcceptWater(IP); + WaterIter = IP; return true; } } @@ -994,7 +997,7 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, break; } if (FoundWaterThatWouldPad) { - NewMBB = AcceptWater(IPThatWouldPad); + WaterIter = IPThatWouldPad; return true; } return false; @@ -1107,7 +1110,6 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, MachineInstr *CPEMI = U.CPEMI; unsigned CPI = CPEMI->getOperand(1).getIndex(); unsigned Size = CPEMI->getOperand(2).getImm(); - MachineBasicBlock *NewMBB; // Compute this only once, it's expensive. The 4 or 8 is the value the // hardware keeps in the PC. unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8); @@ -1123,14 +1125,50 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, unsigned ID = AFI->createConstPoolEntryUId(); // Look for water where we can place this CPE. - if (!LookForWater(U, UserOffset, NewMBB)) { + MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); + MachineBasicBlock *NewMBB; + water_iterator IP; + if (LookForWater(U, UserOffset, IP)) { + DEBUG(errs() << "found water in range\n"); + MachineBasicBlock *WaterBB = *IP; + + // If the original WaterList entry was "new water" on this iteration, + // propagate that to the new island. This is just keeping NewWaterList + // updated to match the WaterList, which will be updated below. + if (NewWaterList.count(WaterBB)) { + NewWaterList.erase(WaterBB); + NewWaterList.insert(NewIsland); + } + // The new CPE goes before the following block (NewMBB). + NewMBB = next(MachineFunction::iterator(WaterBB)); + + } else { // No water found. DEBUG(errs() << "No water found\n"); CreateNewWater(CPUserIndex, UserOffset, NewMBB); + + // SplitBlockBeforeInstr adds to WaterList, which is important when it is + // called while handling branches so that the water will be seen on the + // next iteration for constant pools, but in this context, we don't want + // it. Check for this so it will be removed from the WaterList. + // Also remove any entry from NewWaterList. + MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB)); + IP = std::find(WaterList.begin(), WaterList.end(), WaterBB); + if (IP != WaterList.end()) + NewWaterList.erase(WaterBB); + + // We are adding new water. Update NewWaterList. + NewWaterList.insert(NewIsland); } + // Remove the original WaterList entry; we want subsequent insertions in + // this vicinity to go after the one we're about to insert. This + // considerably reduces the number of times we have to move the same CPE + // more than once and is also important to ensure the algorithm terminates. + if (IP != WaterList.end()) + WaterList.erase(IP); + // Okay, we know we can put an island before NewMBB now, do it! - MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock(); MF.insert(NewMBB, NewIsland); // Update internal data structures to account for the newly inserted MBB. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index c39de0a12b8f..5c1835b46a22 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1287,7 +1287,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); unsigned Width = 32 - Srl_imm; int LSB = Srl_imm - Shl_imm; - if ((LSB + Width) > 32) + if (LSB < 0) return NULL; SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { Op.getOperand(0).getOperand(0), @@ -1427,6 +1427,43 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } } break; + case ISD::AND: { + // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits + // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits + // are entirely contributed by c2 and lower 16-bits are entirely contributed + // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). + // Select it to: "movt x, ((c1 & 0xffff) >> 16) + EVT VT = Op.getValueType(); + if (VT != MVT::i32) + break; + unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) + ? ARM::t2MOVTi16 + : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); + if (!Opc) + break; + SDValue N0 = Op.getOperand(0), N1 = Op.getOperand(1); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (!N1C) + break; + if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { + SDValue N2 = N0.getOperand(1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + if (!N2C) + break; + unsigned N1CVal = N1C->getZExtValue(); + unsigned N2CVal = N2C->getZExtValue(); + if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && + (N1CVal & 0xffffU) == 0xffffU && + (N2CVal & 0xffffU) == 0x0U) { + SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, + MVT::i32); + SDValue Ops[] = { N0.getOperand(0), Imm16, + getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; + return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4); + } + } + break; + } case ARMISD::FMRRD: return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, Op.getOperand(0), getAL(CurDAG), diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 426cecb28eb7..6a264fdfc44f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -25,9 +25,10 @@ #include "llvm/CallingConv.h" #include "llvm/Constants.h" #include "llvm/Function.h" +#include "llvm/GlobalValue.h" #include "llvm/Instruction.h" #include "llvm/Intrinsics.h" -#include "llvm/GlobalValue.h" +#include "llvm/Type.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -2360,8 +2361,11 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"); - unsigned NumElts = VT.getVectorNumElements(); unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); unsigned BlockElts = M[0] + 1; if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) @@ -2378,6 +2382,10 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i < NumElts; i += 2) { @@ -2390,6 +2398,10 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); for (unsigned i = 0; i != NumElts; ++i) { @@ -2398,7 +2410,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, } // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. - if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + if (VT.is64BitVector() && EltSz == 32) return false; return true; @@ -2406,6 +2418,10 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, unsigned &WhichResult) { + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + unsigned NumElts = VT.getVectorNumElements(); WhichResult = (M[0] == 0 ? 0 : 1); unsigned Idx = WhichResult * NumElts / 2; @@ -2417,7 +2433,7 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, } // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. - if (VT.is64BitVector() && VT.getVectorElementType().getSizeInBits() == 32) + if (VT.is64BitVector() && EltSz == 32) return false; return true; @@ -2695,18 +2711,10 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); SDValue Vec = Op.getOperand(0); SDValue Lane = Op.getOperand(1); - - // FIXME: This is invalid for 8 and 16-bit elements - the information about - // sign / zero extension is lost! - Op = DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); - Op = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Op, DAG.getValueType(VT)); - - if (VT.bitsLT(MVT::i32)) - Op = DAG.getNode(ISD::TRUNCATE, dl, VT, Op); - else if (VT.bitsGT(MVT::i32)) - Op = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op); - - return Op; + assert(VT == MVT::i32 && + Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && + "unexpected type for custom-lowering vector extract"); + return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { @@ -3029,7 +3037,6 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } - /// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. static SDValue PerformFMRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 3d19f2345d30..8225fd741bb8 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1220,6 +1220,10 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, string asm, string cstr, list<dag> pattern> : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, asm, cstr, pattern> { let Inst{31-24} = 0b11110100; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{7-4} = op7_4; } class NDataI<dag oops, dag iops, InstrItinClass itin, @@ -1258,15 +1262,26 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{4} = op4; } +// NEON Vector Duplicate (scalar). +// Inst{19-16} is specified by subclasses. +class N2VDup<bits<2> op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, asm, cstr, pattern> { + let Inst{24-23} = op24_23; + let Inst{21-20} = op21_20; + let Inst{11-7} = op11_7; + let Inst{6} = op6; + let Inst{4} = op4; +} + // NEON 2 vector register with immediate. -class N2VImm<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, +class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, string asm, string cstr, list<dag> pattern> : NDataI<oops, iops, itin, asm, cstr, pattern> { let Inst{24} = op24; let Inst{23} = op23; - let Inst{21-16} = op21_16; let Inst{11-8} = op11_8; let Inst{7} = op7; let Inst{6} = op6; @@ -1286,6 +1301,20 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{4} = op4; } +// NEON 3 vector register with immediate. This is only used for VEXT where +// op11_8 represents the starting byte index of the extracted result in the +// concatenation of the operands and is left unspecified. +class N3VImm<bit op24, bit op23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string asm, string cstr, list<dag> pattern> + : NDataI<oops, iops, itin, asm, cstr, pattern> { + let Inst{24} = op24; + let Inst{23} = op23; + let Inst{21-20} = op21_20; + let Inst{6} = op6; + let Inst{4} = op4; +} + // NEON VMOVs between scalar and core registers. class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 4c92891c82bd..dd4123bfa0a3 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 6ec78bc72ee7..384b98cf540c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -980,6 +980,9 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), let Inst{25} = 1; } +def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, + Requires<[IsARM, HasV6T2]>; + let Uses = [CPSR] in def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, "mov", " $dst, $src, rrx", @@ -1580,10 +1583,17 @@ def : ARMPat<(or GPR:$LHS, so_imm2part:$RHS), def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), (EORri (EORri GPR:$LHS, (so_imm2part_1 imm:$RHS)), (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS), + (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), + (SUBri (SUBri GPR:$LHS, (so_imm2part_1 imm:$RHS)), + (so_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. -// This is a single pseudo instruction to make it re-materializable. Remove -// when we can do generalized remat. +// This is a single pseudo instruction, the benefit is that it can be remat'd +// as a single unit instead of having to handle reg inputs. +// FIXME: Remove this when we can do generalized remat. let isReMaterializable = 1 in def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, "movw", " $dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index b34aff7dcb3e..822950c52836 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -456,17 +456,17 @@ class VST2LN<bits<4> op11_8, string OpcodeStr> !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), "", []>; -def VST2LNd8 : VST2LN<0b0000, "vst2.8">; -def VST2LNd16 : VST2LN<0b0100, "vst2.16">; -def VST2LNd32 : VST2LN<0b1000, "vst2.32">; +def VST2LNd8 : VST2LN<0b0001, "vst2.8">; +def VST2LNd16 : VST2LN<0b0101, "vst2.16">; +def VST2LNd32 : VST2LN<0b1001, "vst2.32">; // vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0100, "vst2.16">; -def VST2LNq32a: VST2LN<0b1000, "vst2.32">; +def VST2LNq16a: VST2LN<0b0101, "vst2.16">; +def VST2LNq32a: VST2LN<0b1001, "vst2.32">; // vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0100, "vst2.16">; -def VST2LNq32b: VST2LN<0b1000, "vst2.32">; +def VST2LNq16b: VST2LN<0b0101, "vst2.16">; +def VST2LNq32b: VST2LN<0b1001, "vst2.32">; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN<bits<4> op11_8, string OpcodeStr> @@ -623,12 +623,12 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>; -// Long 2-register intrinsics. (This is currently only used for VMOVL and is -// derived from N2VImm instead of N2V because of the way the size is encoded.) -class N2VLInt<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, InstrItinClass itin, string OpcodeStr, +// Long 2-register intrinsics (currently only used for VMOVL). +class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, + bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, Intrinsic IntOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, (outs QPR:$dst), + : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst), (ins DPR:$src), itin, !strconcat(OpcodeStr, "\t$dst, $src"), "", [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>; @@ -1016,36 +1016,33 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. -class N2VDSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, InstrItinClass itin, string OpcodeStr, - ValueType Ty, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, +class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>; -class N2VQSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, InstrItinClass itin, string OpcodeStr, - ValueType Ty, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, +class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>; // Long shift by immediate. -class N2VLSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, - ValueType OpTy, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, +class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> + : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src), (i32 imm:$SIMM))))]>; // Narrow shift by immediate. -class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, InstrItinClass itin, string OpcodeStr, +class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4, + InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2VImm<op24, op23, op21_16, op11_8, op7, op6, op4, + : N2VImm<op24, op23, op11_8, op7, op6, op4, (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src), @@ -1053,53 +1050,49 @@ class N2VNSh<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, // Shift right by immediate and accumulate, // both double- and quad-register. -class N2VDShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), - IIC_VPALiD, +class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set DPR:$dst, (Ty (add DPR:$src1, (Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>; -class N2VQShAdd<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), - IIC_VPALiD, +class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set QPR:$dst, (Ty (add QPR:$src1, (Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>; // Shift by immediate and insert, // both double- and quad-register. -class N2VDShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, - (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), - IIC_VSHLiD, +class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), + (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>; -class N2VQShIns<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType Ty, SDNode ShOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, - (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), - IIC_VSHLiQ, +class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType Ty, SDNode ShOp> + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), + (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ, !strconcat(OpcodeStr, "\t$dst, $src2, $SIMM"), "$src1 = $dst", [(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>; // Convert, with fractional bits immediate, // both double- and quad-register. -class N2VCvtD<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, +class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 0, op4, + : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>; -class N2VCvtQ<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, - bit op4, string OpcodeStr, ValueType ResTy, ValueType OpTy, +class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4, + string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N2VImm<op24, op23, op21_16, op11_8, op7, 1, op4, + : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ, !strconcat(OpcodeStr, "\t$dst, $src, $SIMM"), "", [(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>; @@ -1175,14 +1168,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: -multiclass N2VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, Intrinsic IntOp> { - def v8i16 : N2VLInt<op24, op23, 0b001000, op11_8, op7, op6, op4, - IIC_VQUNAiD, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; - def v4i32 : N2VLInt<op24, op23, 0b010000, op11_8, op7, op6, op4, - IIC_VQUNAiD, !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; - def v2i64 : N2VLInt<op24, op23, 0b100000, op11_8, op7, op6, op4, - IIC_VQUNAiD, !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; +multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4, + string OpcodeStr, Intrinsic IntOp> { + def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; + def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + !strconcat(OpcodeStr, "16"), v4i32, v4i16, IntOp>; + def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD, + !strconcat(OpcodeStr, "32"), v2i64, v2i32, IntOp>; } @@ -1381,7 +1374,7 @@ multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8, multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, Intrinsic IntOp> : N3VLInt3_HS<op24, op23, op11_8, op4, OpcodeStr, IntOp> { - def v8i16 : N3VLInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D, + def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D, !strconcat(OpcodeStr, "8"), v8i16, v8i8, IntOp>; } @@ -1461,24 +1454,38 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16, multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh<op24, op23, 0b001000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "8"), v8i8, OpNode>; - def v4i16 : N2VDSh<op24, op23, 0b010000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "16"), v4i16, OpNode>; - def v2i32 : N2VDSh<op24, op23, 0b100000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "32"), v2i32, OpNode>; - def v1i64 : N2VDSh<op24, op23, 0b000000, op11_8, 1, op4, itin, + def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "8"), v8i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "16"), v4i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "32"), v2i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin, !strconcat(OpcodeStr, "64"), v1i64, OpNode>; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh<op24, op23, 0b001000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "8"), v16i8, OpNode>; - def v8i16 : N2VQSh<op24, op23, 0b010000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "16"), v8i16, OpNode>; - def v4i32 : N2VQSh<op24, op23, 0b100000, op11_8, 0, op4, itin, - !strconcat(OpcodeStr, "32"), v4i32, OpNode>; - def v2i64 : N2VQSh<op24, op23, 0b000000, op11_8, 1, op4, itin, + def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "8"), v16i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "16"), v8i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin, + !strconcat(OpcodeStr, "32"), v4i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin, !strconcat(OpcodeStr, "64"), v2i64, OpNode>; + // imm6 = xxxxxx } @@ -1487,24 +1494,38 @@ multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShAdd<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v8i8, ShOp>; - def v4i16 : N2VDShAdd<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N2VDShAdd<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v2i32, ShOp>; - def v1i64 : N2VDShAdd<op24, op23, 0b000000, op11_8, 1, op4, + def v8i8 : N2VDShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v8i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v4i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v2i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShAdd<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v16i8, ShOp>; - def v8i16 : N2VQShAdd<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v8i16, ShOp>; - def v4i32 : N2VQShAdd<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v4i32, ShOp>; - def v2i64 : N2VQShAdd<op24, op23, 0b000000, op11_8, 1, op4, + def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v16i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v8i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v4i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v2i64, ShOp>; + // imm6 = xxxxxx } @@ -1513,24 +1534,75 @@ multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShIns<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v8i8, ShOp>; - def v4i16 : N2VDShIns<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N2VDShIns<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v2i32, ShOp>; - def v1i64 : N2VDShIns<op24, op23, 0b000000, op11_8, 1, op4, + def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v8i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v4i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v2i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v1i64, ShOp>; + // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShIns<op24, op23, 0b001000, op11_8, 0, op4, - !strconcat(OpcodeStr, "8"), v16i8, ShOp>; - def v8i16 : N2VQShIns<op24, op23, 0b010000, op11_8, 0, op4, - !strconcat(OpcodeStr, "16"), v8i16, ShOp>; - def v4i32 : N2VQShIns<op24, op23, 0b100000, op11_8, 0, op4, - !strconcat(OpcodeStr, "32"), v4i32, ShOp>; - def v2i64 : N2VQShIns<op24, op23, 0b000000, op11_8, 1, op4, + def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "8"), v16i8, ShOp> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "16"), v8i16, ShOp> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4, + !strconcat(OpcodeStr, "32"), v4i32, ShOp> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4, !strconcat(OpcodeStr, "64"), v2i64, ShOp>; + // imm6 = xxxxxx +} + +// Neon Shift Long operations, +// element sizes of 8, 16, 32 bits: +multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, + bit op4, string OpcodeStr, SDNode OpNode> { + def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "8"), v8i16, v8i8, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "16"), v4i32, v4i16, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4, + !strconcat(OpcodeStr, "32"), v2i64, v2i32, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } +} + +// Neon Shift Narrow operations, +// element sizes of 16, 32, 64 bits: +multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + SDNode OpNode> { + def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, + !strconcat(OpcodeStr, "16"), v8i8, v8i16, OpNode> { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, + !strconcat(OpcodeStr, "32"), v4i16, v4i32, OpNode> { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin, + !strconcat(OpcodeStr, "64"), v2i32, v2i64, OpNode> { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } } //===----------------------------------------------------------------------===// @@ -1903,8 +1975,8 @@ defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_v defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,1,0b0101,0, "vaba.s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,1,0b0101,0, "vaba.u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba.s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba.u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; @@ -2044,34 +2116,25 @@ defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; // VSHLL : Vector Shift Left Long -def VSHLLs8 : N2VLSh<0, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.s8", - v8i16, v8i8, NEONvshlls>; -def VSHLLs16 : N2VLSh<0, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.s16", - v4i32, v4i16, NEONvshlls>; -def VSHLLs32 : N2VLSh<0, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.s32", - v2i64, v2i32, NEONvshlls>; -def VSHLLu8 : N2VLSh<1, 1, 0b001000, 0b1010, 0, 0, 1, "vshll.u8", - v8i16, v8i8, NEONvshllu>; -def VSHLLu16 : N2VLSh<1, 1, 0b010000, 0b1010, 0, 0, 1, "vshll.u16", - v4i32, v4i16, NEONvshllu>; -def VSHLLu32 : N2VLSh<1, 1, 0b100000, 0b1010, 0, 0, 1, "vshll.u32", - v2i64, v2i32, NEONvshllu>; +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) -def VSHLLi8 : N2VLSh<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", - v8i16, v8i8, NEONvshlli>; -def VSHLLi16 : N2VLSh<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", - v4i32, v4i16, NEONvshlli>; -def VSHLLi32 : N2VLSh<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", - v2i64, v2i32, NEONvshlli>; +class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7, + bit op6, bit op4, string OpcodeStr, ValueType ResTy, + ValueType OpTy, SDNode OpNode> + : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, ResTy, OpTy, OpNode> { + let Inst{21-16} = op21_16; +} +def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", + v8i16, v8i8, NEONvshlli>; +def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", + v4i32, v4i16, NEONvshlli>; +def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", + v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -def VSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i16", v8i8, v8i16, NEONvshrn>; -def VSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i32", v4i16, v4i32, NEONvshrn>; -def VSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 0, 1, - IIC_VSHLiD, "vshrn.i64", v2i32, v2i64, NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2083,12 +2146,8 @@ defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -def VRSHRN16 : N2VNSh<0, 1, 0b001000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i16", v8i8, v8i16, NEONvrshrn>; -def VRSHRN32 : N2VNSh<0, 1, 0b010000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i32", v4i16, v4i32, NEONvrshrn>; -def VRSHRN64 : N2VNSh<0, 1, 0b100000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vrshrn.i64", v2i32, v2i64, NEONvrshrn>; +defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i", + NEONvrshrn>; // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2102,26 +2161,14 @@ defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -def VQSHRNs16 : N2VNSh<0, 1, 0b001000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s16", v8i8, v8i16, NEONvqshrns>; -def VQSHRNs32 : N2VNSh<0, 1, 0b010000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s32", v4i16, v4i32, NEONvqshrns>; -def VQSHRNs64 : N2VNSh<0, 1, 0b100000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.s64", v2i32, v2i64, NEONvqshrns>; -def VQSHRNu16 : N2VNSh<1, 1, 0b001000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u16", v8i8, v8i16, NEONvqshrnu>; -def VQSHRNu32 : N2VNSh<1, 1, 0b010000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u32", v4i16, v4i32, NEONvqshrnu>; -def VQSHRNu64 : N2VNSh<1, 1, 0b100000, 0b1001, 0, 0, 1, - IIC_VSHLi4D, "vqshrn.u64", v2i32, v2i64, NEONvqshrnu>; +defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s", + NEONvqshrns>; +defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u", + NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -def VQSHRUN16 : N2VNSh<1, 1, 0b001000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s16", v8i8, v8i16, NEONvqshrnsu>; -def VQSHRUN32 : N2VNSh<1, 1, 0b010000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s32", v4i16, v4i32, NEONvqshrnsu>; -def VQSHRUN64 : N2VNSh<1, 1, 0b100000, 0b1000, 0, 0, 1, - IIC_VSHLi4D, "vqshrun.s64", v2i32, v2i64, NEONvqshrnsu>; +defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s", + NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, @@ -2130,26 +2177,14 @@ defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -def VQRSHRNs16: N2VNSh<0, 1, 0b001000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s16", v8i8, v8i16, NEONvqrshrns>; -def VQRSHRNs32: N2VNSh<0, 1, 0b010000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s32", v4i16, v4i32, NEONvqrshrns>; -def VQRSHRNs64: N2VNSh<0, 1, 0b100000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.s64", v2i32, v2i64, NEONvqrshrns>; -def VQRSHRNu16: N2VNSh<1, 1, 0b001000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u16", v8i8, v8i16, NEONvqrshrnu>; -def VQRSHRNu32: N2VNSh<1, 1, 0b010000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u32", v4i16, v4i32, NEONvqrshrnu>; -def VQRSHRNu64: N2VNSh<1, 1, 0b100000, 0b1001, 0, 1, 1, - IIC_VSHLi4D, "vqrshrn.u64", v2i32, v2i64, NEONvqrshrnu>; +defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s", + NEONvqrshrns>; +defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u", + NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -def VQRSHRUN16: N2VNSh<1, 1, 0b001000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s16", v8i8, v8i16, NEONvqrshrnsu>; -def VQRSHRUN32: N2VNSh<1, 1, 0b010000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s32", v4i16, v4i32, NEONvqrshrnsu>; -def VQRSHRUN64: N2VNSh<1, 1, 0b100000, 0b1000, 0, 1, 1, - IIC_VSHLi4D, "vqrshrun.s64", v2i32, v2i64, NEONvqrshrnsu>; +defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s", + NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; @@ -2491,27 +2526,28 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, ValueType Ty> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, +class VDUPLND<string OpcodeStr, ValueType Ty> + : N2VDup<0b11, 0b11, 0b11000, 0, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, - ValueType ResTy, ValueType OpTy> - : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, +class VDUPLNQ<string OpcodeStr, ValueType ResTy, ValueType OpTy> + : N2VDup<0b11, 0b11, 0b11000, 1, 0, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; -def VDUPLN8d : VDUPLND<0b00, 0b01, "vdup.8", v8i8>; -def VDUPLN16d : VDUPLND<0b00, 0b10, "vdup.16", v4i16>; -def VDUPLN32d : VDUPLND<0b01, 0b00, "vdup.32", v2i32>; -def VDUPLNfd : VDUPLND<0b01, 0b00, "vdup.32", v2f32>; -def VDUPLN8q : VDUPLNQ<0b00, 0b01, "vdup.8", v16i8, v8i8>; -def VDUPLN16q : VDUPLNQ<0b00, 0b10, "vdup.16", v8i16, v4i16>; -def VDUPLN32q : VDUPLNQ<0b01, 0b00, "vdup.32", v4i32, v2i32>; -def VDUPLNfq : VDUPLNQ<0b01, 0b00, "vdup.32", v4f32, v2f32>; +// Inst{19-16} is partially specified depending on the element size. + +def VDUPLN8d : VDUPLND<"vdup.8", v8i8> { let Inst{16} = 1; } +def VDUPLN16d : VDUPLND<"vdup.16", v4i16> { let Inst{17-16} = 0b10; } +def VDUPLN32d : VDUPLND<"vdup.32", v2i32> { let Inst{18-16} = 0b100; } +def VDUPLNfd : VDUPLND<"vdup.32", v2f32> { let Inst{18-16} = 0b100; } +def VDUPLN8q : VDUPLNQ<"vdup.8", v16i8, v8i8> { let Inst{16} = 1; } +def VDUPLN16q : VDUPLNQ<"vdup.16", v8i16, v4i16> { let Inst{17-16} = 0b10; } +def VDUPLN32q : VDUPLNQ<"vdup.32", v4i32, v2i32> { let Inst{18-16} = 0b100; } +def VDUPLNfq : VDUPLNQ<"vdup.32", v4f32, v2f32> { let Inst{18-16} = 0b100; } def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -2530,15 +2566,19 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0, - (outs DPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; +def VDUPfdf : N2VDup<0b11, 0b11, 0b11000, 0, 0, + (outs DPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]> { + let Inst{18-16} = 0b100; +} -def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0, - (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; +def VDUPfqf : N2VDup<0b11, 0b11, 0b11000, 1, 0, + (outs QPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", + [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]> { + let Inst{18-16} = 0b100; +} def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, @@ -2560,8 +2600,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0,1,0b1010,0,0,1, "vmovl.s", int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<1,1,0b1010,0,0,1, "vmovl.u", int_arm_neon_vmovlu>; +defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>; +defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>; // Vector Conversions. @@ -2585,24 +2625,22 @@ def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. -// Note: Some of the opcode bits in the following VCVT instructions need to -// be encoded based on the immed values. -def VCVTf2xsd : N2VCvtD<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xud : N2VCvtD<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fd : N2VCvtD<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fd : N2VCvtD<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; -def VCVTf2xsq : N2VCvtQ<0, 1, 0b000000, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xuq : N2VCvtQ<1, 1, 0b000000, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fq : N2VCvtQ<0, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fq : N2VCvtQ<1, 1, 0b000000, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; // Vector Reverse. @@ -2670,18 +2708,18 @@ def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; // VEXT : Vector Extract class VEXTd<string OpcodeStr, ValueType Ty> - : N3V<0,1,0b11,0b0000,0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), - (Ty DPR:$rhs), imm:$index)))]>; + : N3VImm<0,1,0b11,0,0, (outs DPR:$dst), + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, + !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", + [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), + (Ty DPR:$rhs), imm:$index)))]>; class VEXTq<string OpcodeStr, ValueType Ty> - : N3V<0,1,0b11,0b0000,1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), - (Ty QPR:$rhs), imm:$index)))]>; + : N3VImm<0,1,0b11,1,0, (outs QPR:$dst), + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, + !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", + [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), + (Ty QPR:$rhs), imm:$index)))]>; def VEXTd8 : VEXTd<"vext.8", v8i8>; def VEXTd16 : VEXTd<"vext.16", v4i16>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0750dcc7fdc4..2b6fa98ed3c3 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -69,6 +69,25 @@ def t2_so_imm_neg : Operand<i32>, return ARM_AM::getT2SOImmVal(-((int)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; +// Break t2_so_imm's up into two pieces. This handles immediates with up to 16 +// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12] +// to get the first/second pieces. +def t2_so_imm2part : Operand<i32>, + PatLeaf<(imm), [{ + return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue()); + }]> { +} + +def t2_so_imm2part_1 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def t2_so_imm2part_2 : SDNodeXForm<imm, [{ + unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. def imm1_31 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; @@ -666,6 +685,8 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi, [(set GPR:$dst, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>; +def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>; + //===----------------------------------------------------------------------===// // Extend Instructions. // @@ -1129,6 +1150,20 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Non-Instruction Patterns // +// Two piece so_imms. +def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS), + (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), + (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), + (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; +def : T2Pat<(sub GPR:$LHS, t2_so_imm2part:$RHS), + (t2SUBri (t2SUBri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), + (t2_so_imm2part_2 imm:$RHS))>; + // ConstantPool, GlobalAddress, and JumpTable def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>; def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index d2ec9ee6cdf9..c9b9e84c2a3b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -974,6 +974,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (Advance) { ++Position; ++MBBI; + if (MBBI == E) + // Reach the end of the block, try merging the memory instructions. + TryMerge = true; } else TryMerge = true; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 20a7355b7653..e0be78432973 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -222,12 +222,9 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; }]; - // FIXME: We are reserving r3 in Thumb mode in case the PEI needs to use it - // to generate large stack offset. Make it available once we have register - // scavenging. let MethodBodies = [{ static const unsigned THUMB_tGPR_AO[] = { - ARM::R0, ARM::R1, ARM::R2, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; // FP is R7, only low registers available. diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index cf1ee3f02953..5af95c33b930 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -27,11 +27,11 @@ UseNEONFP("arm-use-neon-fp", cl::init(false), cl::Hidden); ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, - bool isThumb) + bool isT) : ARMArchVersion(V4T) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(UseNEONFP) - , IsThumb(isThumb) + , IsThumb(isT) , ThumbMode(Thumb1) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) @@ -98,9 +98,13 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (isTargetDarwin()) IsR9Reserved = ReserveR9 | (ARMArchVersion < V6); + if (!isThumb() || hasThumb2()) + PostRAScheduler = true; + // Set CPU specific features. if (CPUString == "cortex-a8") { - PostRAScheduler = true; + // On Cortex-a8, it's faster to perform some single-precision FP + // operations with NEON instructions. if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 7098fd4f36ba..74781593a0d9 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -126,9 +126,13 @@ protected: const std::string & getCPUString() const { return CPUString; } - /// enablePostRAScheduler - From TargetSubtarget, return true to - /// enable post-RA scheduler. - bool enablePostRAScheduler() const { return PostRAScheduler; } + /// enablePostRAScheduler - True at 'More' optimization except + /// for Thumb1. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& mode) const { + mode = TargetSubtarget::ANTIDEP_NONE; + return PostRAScheduler && OptLevel >= CodeGenOpt::Default; + } /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 32ddc20a5604..c1da6ce88b9a 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -16,7 +16,6 @@ #include "ARM.h" #include "llvm/PassManager.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7438ea9c79f3..403f96c69e58 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -56,6 +56,14 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); + bool ParseDirectiveThumb(SMLoc L); + + bool ParseDirectiveThumbFunc(SMLoc L); + + bool ParseDirectiveCode(SMLoc L); + + bool ParseDirectiveSyntax(SMLoc L); + // TODO - For now hacked versions of the next two are in here in this file to // allow some parser testing until the table gen versions are implemented. @@ -230,8 +238,8 @@ bool ARMAsmParser::ParseRegister(ARMOperand &Op) { return false; } -// Try to parse a register list. The first token must be a '{' when called -// for now. +// Parse a register list, return false if successful else return true or an +// error. The first token must be a '{' when called. bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { assert(getLexer().getTok().is(AsmToken::LCurly) && "Token is not an Left Curly Brace"); @@ -277,7 +285,8 @@ bool ARMAsmParser::ParseRegisterList(ARMOperand &Op) { return false; } -// Try to parse an arm memory expression. It must start with a '[' token. +// Parse an arm memory expression, return false if successful else return true +// or an error. The first token must be a '[' when called. // TODO Only preindexing and postindexing addressing are started, unindexed // with option, etc are still to do. bool ARMAsmParser::ParseMemory(ARMOperand &Op) { @@ -374,50 +383,55 @@ bool ARMAsmParser::ParseMemory(ARMOperand &Op) { Writeback = true; getLexer().Lex(); // Eat right bracket token. - const AsmToken &CommaTok = getLexer().getTok(); - if (CommaTok.isNot(AsmToken::Comma)) - return Error(CommaTok.getLoc(), "',' expected"); - getLexer().Lex(); // Eat comma token. - - const AsmToken &NextTok = getLexer().getTok(); - if (NextTok.is(AsmToken::Plus)) - getLexer().Lex(); // Eat plus token. - else if (NextTok.is(AsmToken::Minus)) { - Negative = true; - getLexer().Lex(); // Eat minus token - } - - // See if there is a register following the "[Rn]," we have so far. - const AsmToken &OffsetRegTok = getLexer().getTok(); - int OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + int OffsetRegNum = 0; bool OffsetRegShifted = false; enum ShiftType ShiftType; const MCExpr *ShiftAmount; const MCExpr *Offset; - if (OffsetRegNum != -1) { - OffsetIsReg = true; - getLexer().Lex(); // Eat identifier token for the offset register. - // Look for a comma then a shift - const AsmToken &Tok = getLexer().getTok(); - if (Tok.is(AsmToken::Comma)) { - getLexer().Lex(); // Eat comma token. - const AsmToken &Tok = getLexer().getTok(); - if (ParseShift(&ShiftType, ShiftAmount)) - return Error(Tok.getLoc(), "shift expected"); - OffsetRegShifted = true; + const AsmToken &NextTok = getLexer().getTok(); + if (NextTok.isNot(AsmToken::EndOfStatement)) { + if (NextTok.isNot(AsmToken::Comma)) + return Error(NextTok.getLoc(), "',' expected"); + getLexer().Lex(); // Eat comma token. + + const AsmToken &PlusMinusTok = getLexer().getTok(); + if (PlusMinusTok.is(AsmToken::Plus)) + getLexer().Lex(); // Eat plus token. + else if (PlusMinusTok.is(AsmToken::Minus)) { + Negative = true; + getLexer().Lex(); // Eat minus token } - } - else { // "[Rn]," we have so far was not followed by "Rm" - // Look for #offset following the "[Rn]," - const AsmToken &HashTok = getLexer().getTok(); - if (HashTok.isNot(AsmToken::Hash)) - return Error(HashTok.getLoc(), "'#' expected"); - getLexer().Lex(); // Eat hash token. - if (getParser().ParseExpression(Offset)) - return true; + // See if there is a register following the "[Rn]," we have so far. + const AsmToken &OffsetRegTok = getLexer().getTok(); + OffsetRegNum = MatchRegisterName(OffsetRegTok.getString()); + if (OffsetRegNum != -1) { + OffsetIsReg = true; + getLexer().Lex(); // Eat identifier token for the offset register. + // Look for a comma then a shift + const AsmToken &Tok = getLexer().getTok(); + if (Tok.is(AsmToken::Comma)) { + getLexer().Lex(); // Eat comma token. + + const AsmToken &Tok = getLexer().getTok(); + if (ParseShift(&ShiftType, ShiftAmount)) + return Error(Tok.getLoc(), "shift expected"); + OffsetRegShifted = true; + } + } + else { // "[Rn]," we have so far was not followed by "Rm" + // Look for #offset following the "[Rn]," + const AsmToken &HashTok = getLexer().getTok(); + if (HashTok.isNot(AsmToken::Hash)) + return Error(HashTok.getLoc(), "'#' expected"); + getLexer().Lex(); // Eat hash token. + + if (getParser().ParseExpression(Offset)) + return true; + } } + Op = ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, OffsetRegNum, OffsetRegShifted, ShiftType, ShiftAmount, Preindexed, Postindexed, Negative, Writeback); @@ -465,7 +479,7 @@ bool ARMAsmParser::ParseShift(ShiftType *St, const MCExpr *&ShiftAmount) { return false; } -// A hack to allow some testing +// A hack to allow some testing, to be replaced by a real table gen version. int ARMAsmParser::MatchRegisterName(const StringRef &Name) { if (Name == "r0" || Name == "R0") return 0; @@ -504,7 +518,7 @@ int ARMAsmParser::MatchRegisterName(const StringRef &Name) { return -1; } -// A hack to allow some testing +// A hack to allow some testing, to be replaced by a real table gen version. bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, MCInst &Inst) { struct ARMOperand Op0 = Operands[0]; @@ -516,40 +530,58 @@ bool ARMAsmParser::MatchInstruction(SmallVectorImpl<ARMOperand> &Operands, Mnemonic == "ldmfd" || Mnemonic == "ldr" || Mnemonic == "mov" || - Mnemonic == "sub") + Mnemonic == "sub" || + Mnemonic == "bl" || + Mnemonic == "push" || + Mnemonic == "blx" || + Mnemonic == "pop") { + // Hard-coded to a valid instruction, till we have a real matcher. + Inst = MCInst(); + Inst.setOpcode(ARM::MOVr); + Inst.addOperand(MCOperand::CreateReg(2)); + Inst.addOperand(MCOperand::CreateReg(2)); + Inst.addOperand(MCOperand::CreateImm(0)); + Inst.addOperand(MCOperand::CreateImm(0)); + Inst.addOperand(MCOperand::CreateReg(0)); return false; + } return true; } -// TODO - this is a work in progress +// Parse a arm instruction operand. For now this parses the operand regardless +// of the mnemonic. bool ARMAsmParser::ParseOperand(ARMOperand &Op) { switch (getLexer().getKind()) { case AsmToken::Identifier: if (!ParseRegister(Op)) return false; - // TODO parse other operands that start with an identifier like labels - return Error(getLexer().getTok().getLoc(), "labels not yet supported"); + // This was not a register so parse other operands that start with an + // identifier (like labels) as expressions and create them as immediates. + const MCExpr *IdVal; + if (getParser().ParseExpression(IdVal)) + return true; + Op = ARMOperand::CreateImm(IdVal); + return false; case AsmToken::LBrac: - if (!ParseMemory(Op)) - return false; + return ParseMemory(Op); case AsmToken::LCurly: - if (!ParseRegisterList(Op)) - return false; + return ParseRegisterList(Op); case AsmToken::Hash: // #42 -> immediate. // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate getLexer().Lex(); - const MCExpr *Val; - if (getParser().ParseExpression(Val)) + const MCExpr *ImmVal; + if (getParser().ParseExpression(ImmVal)) return true; - Op = ARMOperand::CreateImm(Val); + Op = ARMOperand::CreateImm(ImmVal); return false; default: return Error(getLexer().getTok().getLoc(), "unexpected token in operand"); } } +// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { SmallVector<ARMOperand, 7> Operands; @@ -579,10 +611,19 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, MCInst &Inst) { return true; } +/// ParseDirective parses the arm specific directives bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") return ParseDirectiveWord(4, DirectiveID.getLoc()); + else if (IDVal == ".thumb") + return ParseDirectiveThumb(DirectiveID.getLoc()); + else if (IDVal == ".thumb_func") + return ParseDirectiveThumbFunc(DirectiveID.getLoc()); + else if (IDVal == ".code") + return ParseDirectiveCode(DirectiveID.getLoc()); + else if (IDVal == ".syntax") + return ParseDirectiveSyntax(DirectiveID.getLoc()); return true; } @@ -611,6 +652,93 @@ bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +/// ParseDirectiveThumb +/// ::= .thumb +bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + getLexer().Lex(); + + // TODO: set thumb mode + // TODO: tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveThumbFunc +/// ::= .thumbfunc symbol_name +bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) + return Error(L, "unexpected token in .syntax directive"); + StringRef SymbolName = getLexer().getTok().getIdentifier(); + getLexer().Lex(); // Consume the identifier token. + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(L, "unexpected token in directive"); + getLexer().Lex(); + + // TODO: mark symbol as a thumb symbol + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveSyntax +/// ::= .syntax unified | divided +bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Identifier)) + return Error(L, "unexpected token in .syntax directive"); + const StringRef &Mode = Tok.getString(); + bool unified_syntax; + if (Mode == "unified" || Mode == "UNIFIED") { + getLexer().Lex(); + unified_syntax = true; + } + else if (Mode == "divided" || Mode == "DIVIDED") { + getLexer().Lex(); + unified_syntax = false; + } + else + return Error(L, "unrecognized syntax mode in .syntax directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(getLexer().getTok().getLoc(), "unexpected token in directive"); + getLexer().Lex(); + + // TODO tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + +/// ParseDirectiveCode +/// ::= .code 16 | 32 +bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { + const AsmToken &Tok = getLexer().getTok(); + if (Tok.isNot(AsmToken::Integer)) + return Error(L, "unexpected token in .code directive"); + int64_t Val = getLexer().getTok().getIntVal(); + bool thumb_mode; + if (Val == 16) { + getLexer().Lex(); + thumb_mode = true; + } + else if (Val == 32) { + getLexer().Lex(); + thumb_mode = false; + } + else + return Error(L, "invalid operand to .code directive"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return Error(getLexer().getTok().getLoc(), "unexpected token in directive"); + getLexer().Lex(); + + // TODO tell the MC streamer the mode + // getParser().getStreamer().Emit???(); + return false; +} + // Force static initialization. extern "C" void LLVMInitializeARMAsmParser() { RegisterAsmParser<ARMAsmParser> X(TheARMTarget); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 546731b00d3c..8719e4c33903 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -1,3 +1,5 @@ +//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source @@ -13,21 +15,25 @@ #define DEBUG_TYPE "asm-printer" #include "ARM.h" #include "ARMBuildAttrs.h" -#include "ARMTargetMachine.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" +#include "ARMInstPrinter.h" #include "ARMMachineFunctionInfo.h" +#include "ARMMCInstLower.h" +#include "ARMTargetMachine.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInst.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -38,18 +44,22 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" -#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/FormattedStream.h" #include <cctype> using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); +static cl::opt<bool> +EnableMCInst("enable-arm-mcinst-printer", cl::Hidden, + cl::desc("enable experimental asmprinter gunk in the arm backend")); + namespace { - class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter { + class ARMAsmPrinter : public AsmPrinter { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when printing asm code for different targets. @@ -63,34 +73,23 @@ namespace { /// MachineFunction. const MachineConstantPool *MCP; - /// We name each basic block in a Function with a unique number, so - /// that we can consistently refer to them later. This is cleared - /// at the beginning of each call to runOnMachineFunction(). - /// - typedef std::map<const Value *, unsigned> ValueMapTy; - ValueMapTy NumberForBB; - - /// GVNonLazyPtrs - Keeps the set of GlobalValues that require - /// non-lazy-pointers for indirect access. - StringMap<std::string> GVNonLazyPtrs; - - /// HiddenGVNonLazyPtrs - Keeps the set of GlobalValues with hidden - /// visibility that require non-lazy-pointers for indirect access. - StringMap<std::string> HiddenGVNonLazyPtrs; - - /// True if asm printer is printing a series of CONSTPOOL_ENTRY. - bool InCPMode; public: explicit ARMAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, const MCAsmInfo *T, bool V) - : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL), - InCPMode(false) { + : AsmPrinter(O, TM, T, V), AFI(NULL), MCP(NULL) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); } virtual const char *getPassName() const { return "ARM Assembly Printer"; } + + void printMCInst(const MCInst *MI) { + ARMInstPrinter(O, *MAI, VerboseAsm).printInstruction(MI); + } + + void printInstructionThroughMCStreamer(const MachineInstr *MI); + void printOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); @@ -149,8 +148,8 @@ namespace { void printMachineInstruction(const MachineInstr *MI); bool runOnMachineFunction(MachineFunction &F); - bool doFinalization(Module &M); void EmitStartOfAsmFile(Module &M); + void EmitEndOfAsmFile(Module &M); /// EmitMachineConstantPoolValue - Print a machine constantpool value to /// the .s file. @@ -173,12 +172,19 @@ namespace { Name = Mang->getMangledName(GV); else { // FIXME: Remove this when Darwin transition to @GOT like syntax. - std::string SymName = Mang->getMangledName(GV); Name = Mang->getMangledName(GV, "$non_lazy_ptr", true); - if (GV->hasHiddenVisibility()) - HiddenGVNonLazyPtrs[SymName] = Name; - else - GVNonLazyPtrs[SymName] = Name; + MCSymbol *Sym = OutContext.GetOrCreateSymbol(StringRef(Name)); + + MachineModuleInfoMachO &MMIMachO = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + const MCSymbol *&StubSym = + GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(Sym) : + MMIMachO.getGVStubEntry(Sym); + if (StubSym == 0) { + SmallString<128> NameStr; + Mang->getNameWithPrefix(NameStr, GV, false); + StubSym = OutContext.GetOrCreateSymbol(NameStr.str()); + } } } else Name = Mang->makeNameProper(ACPV->getSymbol()); @@ -260,7 +266,6 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (Subtarget->isTargetDarwin()) O << "\t" << CurrentFnName; O << "\n"; - InCPMode = false; } else { EmitAlignment(FnAlign, F); } @@ -283,14 +288,13 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // Print a label for the basic block. - if (I != MF.begin()) { + if (I != MF.begin()) EmitBasicBlockStart(I); - } + + // Print the assembly for the instruction. for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); - II != E; ++II) { - // Print the assembly for the instruction. + II != E; ++II) printMachineInstruction(II); - } } if (MAI->hasDotTypeDotSizeDirective()) @@ -306,25 +310,25 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const char *Modifier) { const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { + default: + assert(0 && "<unknown operand type>"); case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (Modifier && strcmp(Modifier, "dregpair") == 0) { - unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 - unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 - O << '{' - << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) - << '}'; - } else if (Modifier && strcmp(Modifier, "lane") == 0) { - unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); - unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, - &ARM::DPR_VFP2RegClass); - O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; - } else { - O << getRegisterName(Reg); - } - } else - llvm_unreachable("not implemented"); + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (Modifier && strcmp(Modifier, "dregpair") == 0) { + unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 + unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + O << '{' + << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) + << '}'; + } else if (Modifier && strcmp(Modifier, "lane") == 0) { + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, + &ARM::DPR_VFP2RegClass); + O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; + } else { + O << getRegisterName(Reg); + } break; } case MachineOperand::MO_Immediate: { @@ -372,8 +376,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); break; - default: - O << "<unknown operand type>"; abort (); break; } } @@ -1027,22 +1029,19 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; - int Opc = MI->getOpcode(); - switch (Opc) { - case ARM::CONSTPOOL_ENTRY: - if (!InCPMode && AFI->isThumbFunction()) { - EmitAlignment(2); - InCPMode = true; - } - break; - default: { - if (InCPMode && AFI->isThumbFunction()) - InCPMode = false; - }} - // Call the autogenerated instruction printer routines. processDebugLoc(MI, true); - printInstruction(MI); + + if (EnableMCInst) { + printInstructionThroughMCStreamer(MI); + } else { + int Opc = MI->getOpcode(); + if (Opc == ARM::CONSTPOOL_ENTRY) + EmitAlignment(2); + + printInstruction(MI); + } + if (VerboseAsm && !MI->getDebugLoc().isUnknown()) EmitComments(*MI); O << '\n'; @@ -1256,34 +1255,40 @@ void ARMAsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { } -bool ARMAsmPrinter::doFinalization(Module &M) { +void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { if (Subtarget->isTargetDarwin()) { // All darwin targets use mach-o. TargetLoweringObjectFileMachO &TLOFMacho = static_cast<TargetLoweringObjectFileMachO &>(getObjFileLowering()); + MachineModuleInfoMachO &MMIMacho = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); O << '\n'; // Output non-lazy-pointers for external and common global variables. - if (!GVNonLazyPtrs.empty()) { + MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList(); + + if (!Stubs.empty()) { // Switch with ".non_lazy_symbol_pointer" directive. OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection()); EmitAlignment(2); - for (StringMap<std::string>::iterator I = GVNonLazyPtrs.begin(), - E = GVNonLazyPtrs.end(); I != E; ++I) { - O << I->second << ":\n"; - O << "\t.indirect_symbol " << I->getKeyData() << "\n"; - O << "\t.long\t0\n"; + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n\t.indirect_symbol "; + Stubs[i].second->print(O, MAI); + O << "\n\t.long\t0\n"; } } - if (!HiddenGVNonLazyPtrs.empty()) { + Stubs = MMIMacho.GetHiddenGVStubList(); + if (!Stubs.empty()) { OutStreamer.SwitchSection(getObjFileLowering().getDataSection()); EmitAlignment(2); - for (StringMap<std::string>::iterator I = HiddenGVNonLazyPtrs.begin(), - E = HiddenGVNonLazyPtrs.end(); I != E; ++I) { - O << I->second << ":\n"; - O << "\t.long " << I->getKeyData() << "\n"; + for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { + Stubs[i].first->print(O, MAI); + O << ":\n\t.long "; + Stubs[i].second->print(O, MAI); + O << "\n"; } } @@ -1292,14 +1297,179 @@ bool ARMAsmPrinter::doFinalization(Module &M) { // implementation of multiple entry points). If this doesn't occur, the // linker can safely perform dead code stripping. Since LLVM never // generates code that does this, it is always safe to set. - O << "\t.subsections_via_symbols\n"; + OutStreamer.EmitAssemblerFlag(MCStreamer::SubsectionsViaSymbols); + } +} + +//===----------------------------------------------------------------------===// + +void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { + ARMMCInstLower MCInstLowering(OutContext, *Mang, *this); + switch (MI->getOpcode()) { + case ARM::t2MOVi32imm: + assert(0 && "Should be lowered by thumb2it pass"); + default: break; + case TargetInstrInfo::DBG_LABEL: + case TargetInstrInfo::EH_LABEL: + case TargetInstrInfo::GC_LABEL: + printLabel(MI); + return; + case TargetInstrInfo::KILL: + return; + case TargetInstrInfo::INLINEASM: + O << '\t'; + printInlineAsm(MI); + return; + case TargetInstrInfo::IMPLICIT_DEF: + printImplicitDef(MI); + return; + case ARM::PICADD: { // FIXME: Remove asm string from td file. + // This is a pseudo op for a label + instruction sequence, which looks like: + // LPC0: + // add r0, pc, r0 + // This adds the address of LPC0 to r0. + + // Emit the label. + // FIXME: MOVE TO SHARED PLACE. + unsigned Id = (unsigned)MI->getOperand(2).getImm(); + const char *Prefix = MAI->getPrivateGlobalPrefix(); + MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)+"PC"+Twine(Id)); + OutStreamer.EmitLabel(Label); + + + // Form and emit tha dd. + MCInst AddInst; + AddInst.setOpcode(ARM::ADDrr); + AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + AddInst.addOperand(MCOperand::CreateReg(ARM::PC)); + AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg())); + printMCInst(&AddInst); + return; + } + case ARM::CONSTPOOL_ENTRY: { // FIXME: Remove asm string from td file. + /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool + /// in the function. The first operand is the ID# for this instruction, the + /// second is the index into the MachineConstantPool that this is, the third + /// is the size in bytes of this constant pool entry. + unsigned LabelId = (unsigned)MI->getOperand(0).getImm(); + unsigned CPIdx = (unsigned)MI->getOperand(1).getIndex(); + + EmitAlignment(2); + + const char *Prefix = MAI->getPrivateGlobalPrefix(); + MCSymbol *Label = OutContext.GetOrCreateSymbol(Twine(Prefix)+"CPI"+ + Twine(getFunctionNumber())+ + "_"+ Twine(LabelId)); + OutStreamer.EmitLabel(Label); + + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx]; + if (MCPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal); + else + EmitGlobalConstant(MCPE.Val.ConstVal); + + return; } + case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file. + // This is a hack that lowers as a two instruction sequence. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + + unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); + unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); + + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVi); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); + TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1)); + + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out + printMCInst(&TmpInst); + O << '\n'; + } - return AsmPrinter::doFinalization(M); + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::ORRri); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // inreg + TmpInst.addOperand(MCOperand::CreateImm(SOImmValV2)); // so_imm + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out + printMCInst(&TmpInst); + } + return; + } + case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file. + // This is a hack that lowers as a two instruction sequence. + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned ImmVal = (unsigned)MI->getOperand(1).getImm(); + + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVi16); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg + TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm) + + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + printMCInst(&TmpInst); + O << '\n'; + } + + { + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVTi16); + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg + TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg + TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm) + + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm())); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg())); + + printMCInst(&TmpInst); + } + + return; + } + } + + MCInst TmpInst; + MCInstLowering.Lower(MI, TmpInst); + + printMCInst(&TmpInst); +} + +//===----------------------------------------------------------------------===// +// Target Registry Stuff +//===----------------------------------------------------------------------===// + +static MCInstPrinter *createARMMCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O) { + if (SyntaxVariant == 0) + return new ARMInstPrinter(O, MAI, false); + return 0; } // Force static initialization. extern "C" void LLVMInitializeARMAsmPrinter() { RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget); RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget); + + TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter); + TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter); } + diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp new file mode 100644 index 000000000000..f422798e315c --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -0,0 +1,358 @@ +//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an ARM MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asm-printer" +#include "ARM.h" // FIXME: FACTOR ENUMS BETTER. +#include "ARMInstPrinter.h" +#include "ARMAddressingModes.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Include the auto-generated portion of the assembly writer. +#define MachineInstr MCInst +#define ARMAsmPrinter ARMInstPrinter // FIXME: REMOVE. +#define NO_ASM_WRITER_BOILERPLATE +#include "ARMGenAsmWriter.inc" +#undef MachineInstr +#undef ARMAsmPrinter + +void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } + +void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier) { + const MCOperand &Op = MI->getOperand(OpNo); + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Modifier && strcmp(Modifier, "dregpair") == 0) { + // FIXME: Breaks e.g. ARM/vmul.ll. + assert(0); + /* + unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0 + unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1 + O << '{' + << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) + << '}';*/ + } else if (Modifier && strcmp(Modifier, "lane") == 0) { + assert(0); + /* + unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); + unsigned DReg = TRI->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1, + &ARM::DPR_VFP2RegClass); + O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; + */ + } else { + O << getRegisterName(Reg); + } + } else if (Op.isImm()) { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + O << '#' << Op.getImm(); + } else { + assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert(Op.isExpr() && "unknown operand kind in printOperand"); + Op.getExpr()->print(O, &MAI); + } +} + +static void printSOImm(raw_ostream &O, int64_t V, bool VerboseAsm, + const MCAsmInfo *MAI) { + // Break it up into two parts that make up a shifter immediate. + V = ARM_AM::getSOImmVal(V); + assert(V != -1 && "Not a valid so_imm value!"); + + unsigned Imm = ARM_AM::getSOImmValImm(V); + unsigned Rot = ARM_AM::getSOImmValRot(V); + + // Print low-level immediate formation info, per + // A5.1.3: "Data-processing operands - Immediate". + if (Rot) { + O << "#" << Imm << ", " << Rot; + // Pretty printed version. + if (VerboseAsm) + O << ' ' << MAI->getCommentString() + << ' ' << (int)ARM_AM::rotr32(Imm, Rot); + } else { + O << "#" << Imm; + } +} + + +/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit +/// immediate in bits 0-7. +void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + assert(MO.isImm() && "Not a valid so_imm value!"); + printSOImm(O, MO.getImm(), VerboseAsm, &MAI); +} + +/// printSOImm2PartOperand - SOImm is broken into two pieces using a 'mov' +/// followed by an 'orr' to materialize. +void ARMInstPrinter::printSOImm2PartOperand(const MCInst *MI, unsigned OpNum) { + // FIXME: REMOVE this method. + abort(); +} + +// so_reg is a 4-operand unit corresponding to register forms of the A5.1 +// "Addressing Mode 1 - Data-processing operands" forms. This includes: +// REG 0 0 - e.g. R5 +// REG REG 0,SH_OPC - e.g. R5, ROR R3 +// REG 0 IMM,SH_OPC - e.g. R5, LSL #3 +void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << getRegisterName(MO1.getReg()); + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())) + << ' '; + + if (MO2.getReg()) { + O << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } +} + + +void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + + if (!MO2.getReg()) { + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + O << ", #" + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAM2Offset(MO3.getImm()); + O << "]"; + return; + } + + O << ", " + << (char)ARM_AM::getAM2Op(MO3.getImm()) + << getRegisterName(MO2.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; + O << "]"; +} + +void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs; + return; + } + + O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm())) + << " #" << ShImm; +} + +void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << '[' << getRegisterName(MO1.getReg()); + + if (MO2.getReg()) { + O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm()) + << getRegisterName(MO2.getReg()) << ']'; + return; + } + + if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) + O << ", #" + << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ImmOffs; + O << ']'; +} + +void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (MO1.getReg()) { + O << (char)ARM_AM::getAM3Op(MO2.getImm()) + << getRegisterName(MO1.getReg()); + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); + assert(ImmOffs && "Malformed indexed load / store!"); + O << "#" + << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ImmOffs; +} + + +void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Modifier && strcmp(Modifier, "submode") == 0) { + if (MO1.getReg() == ARM::SP) { + // FIXME + bool isLDM = (MI->getOpcode() == ARM::LDM || + MI->getOpcode() == ARM::LDM_RET || + MI->getOpcode() == ARM::t2LDM || + MI->getOpcode() == ARM::t2LDM_RET); + O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + } else if (Modifier && strcmp(Modifier, "wide") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); + if (Mode == ARM_AM::ia) + O << ".w"; + } else { + printOperand(MI, OpNum); + if (ARM_AM::getAM4WBFlag(MO2.getImm())) + O << "!"; + } +} + +void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, OpNum); + return; + } + + if (Modifier && strcmp(Modifier, "submode") == 0) { + ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); + if (MO1.getReg() == ARM::SP) { + bool isFLDM = (MI->getOpcode() == ARM::FLDMD || + MI->getOpcode() == ARM::FLDMS); + O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); + } else + O << ARM_AM::getAMSubModeStr(Mode); + return; + } else if (Modifier && strcmp(Modifier, "base") == 0) { + // Used for FSTM{D|S} and LSTM{D|S} operations. + O << getRegisterName(MO1.getReg()); + if (ARM_AM::getAM5WBFlag(MO2.getImm())) + O << "!"; + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + + if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { + O << ", #" + << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ImmOffs*4; + } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + // FIXME: No support yet for specifying alignment. + O << '[' << getRegisterName(MO1.getReg()) << ']'; + + if (ARM_AM::getAM6WBFlag(MO3.getImm())) { + if (MO2.getReg() == 0) + O << '!'; + else + O << ", " << getRegisterName(MO2.getReg()); + } +} + +void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + assert(0 && "FIXME: Implement printAddrModePCOperand"); +} + +void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + uint32_t v = ~MO.getImm(); + int32_t lsb = CountTrailingZeros_32(v); + int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb; + assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!"); + O << '#' << lsb << ", #" << width; +} + +void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) { + O << "{"; + // Always skip the first operand, it's the optional (and implicit writeback). + for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) { + if (i != OpNum+1) O << ", "; + O << getRegisterName(MI->getOperand(i).getReg()); + } + O << "}"; +} + +void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) { + ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); + if (CC != ARMCC::AL) + O << ARMCondCodeToString(CC); +} + +void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum){ + if (MI->getOperand(OpNum).getReg()) { + assert(MI->getOperand(OpNum).getReg() == ARM::CPSR && + "Expect ARM CPSR register!"); + O << 's'; + } +} + + + +void ARMInstPrinter::printCPInstOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier) { + // FIXME: remove this. + abort(); +} + +void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum) { + O << MI->getOperand(OpNum).getImm(); +} + + +void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { + // FIXME: remove this. + abort(); +} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h new file mode 100644 index 000000000000..492513768b27 --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -0,0 +1,89 @@ +//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class prints an ARM MCInst to a .s file. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMINSTPRINTER_H +#define ARMINSTPRINTER_H + +#include "llvm/MC/MCInstPrinter.h" + +namespace llvm { + class MCOperand; + +class ARMInstPrinter : public MCInstPrinter { + bool VerboseAsm; +public: + ARMInstPrinter(raw_ostream &O, const MCAsmInfo &MAI, bool verboseAsm) + : MCInstPrinter(O, MAI), VerboseAsm(verboseAsm) {} + + virtual void printInst(const MCInst *MI); + + // Autogenerated by tblgen. + void printInstruction(const MCInst *MI); + static const char *getRegisterName(unsigned RegNo); + + + void printOperand(const MCInst *MI, unsigned OpNo, + const char *Modifier = 0); + + void printSOImmOperand(const MCInst *MI, unsigned OpNum); + void printSOImm2PartOperand(const MCInst *MI, unsigned OpNum); + + void printSORegOperand(const MCInst *MI, unsigned OpNum); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum); + void printAddrMode4Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier = 0); + void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, + const char *Modifier = 0); + void printAddrMode6Operand(const MCInst *MI, unsigned OpNum); + void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier = 0); + + void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum); + + void printThumbITMask(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, + unsigned Scale) {} + void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {} + void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {} + + void printT2SOOperand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {} + void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {} + + void printPredicateOperand(const MCInst *MI, unsigned OpNum); + void printSBitModifierOperand(const MCInst *MI, unsigned OpNum); + void printRegisterList(const MCInst *MI, unsigned OpNum); + void printCPInstOperand(const MCInst *MI, unsigned OpNum, + const char *Modifier); + void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {} + void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {} + void printTBAddrMode(const MCInst *MI, unsigned OpNum) {} + void printNoHashImmediate(const MCInst *MI, unsigned OpNum); + + void printPCLabel(const MCInst *MI, unsigned OpNum); + // FIXME: Implement. + void PrintSpecial(const MCInst *MI, const char *Kind) {} +}; + +} + +#endif diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp new file mode 100644 index 000000000000..757164e682af --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp @@ -0,0 +1,166 @@ +//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains code to lower ARM MachineInstrs to their corresponding +// MCInst records. +// +//===----------------------------------------------------------------------===// + +#include "ARMMCInstLower.h" +//#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +//#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Mangler.h" +#include "llvm/ADT/SmallString.h" +using namespace llvm; + + +#if 0 +const ARMSubtarget &ARMMCInstLower::getSubtarget() const { + return AsmPrinter.getSubtarget(); +} + +MachineModuleInfoMachO &ARMMCInstLower::getMachOMMI() const { + assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin"); + return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>(); +} +#endif + +MCSymbol *ARMMCInstLower:: +GetGlobalAddressSymbol(const MachineOperand &MO) const { + const GlobalValue *GV = MO.getGlobal(); + + SmallString<128> Name; + Mang.getNameWithPrefix(Name, GV, false); + + // FIXME: HANDLE PLT references how?? + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *ARMMCInstLower:: +GetExternalSymbolSymbol(const MachineOperand &MO) const { + SmallString<128> Name; + Name += Printer.MAI->getGlobalPrefix(); + Name += MO.getSymbolName(); + + // FIXME: HANDLE PLT references how?? + switch (MO.getTargetFlags()) { + default: assert(0 && "Unknown target flag on GV operand"); + case 0: break; + } + + return Ctx.GetOrCreateSymbol(Name.str()); +} + + + +MCSymbol *ARMMCInstLower:: +GetJumpTableSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI" + << Printer.getFunctionNumber() << '_' << MO.getIndex(); + +#if 0 + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + } +#endif + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCSymbol *ARMMCInstLower:: +GetConstantPoolIndexSymbol(const MachineOperand &MO) const { + SmallString<256> Name; + raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI" + << Printer.getFunctionNumber() << '_' << MO.getIndex(); + +#if 0 + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + } +#endif + + // Create a symbol for the name. + return Ctx.GetOrCreateSymbol(Name.str()); +} + +MCOperand ARMMCInstLower:: +LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { + // FIXME: We would like an efficient form for this, so we don't have to do a + // lot of extra uniquing. + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + +#if 0 + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on GV operand"); + } +#endif + + if (!MO.isJTI() && MO.getOffset()) + Expr = MCBinaryExpr::CreateAdd(Expr, + MCConstantExpr::Create(MO.getOffset(), Ctx), + Ctx); + return MCOperand::CreateExpr(Expr); +} + + +void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + MCOperand MCOp; + switch (MO.getType()) { + default: + MI->dump(); + assert(0 && "unknown operand type"); + case MachineOperand::MO_Register: + // Ignore all implicit register operands. + if (MO.isImplicit()) continue; + MCOp = MCOperand::CreateReg(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + MCOp = MCOperand::CreateImm(MO.getImm()); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( + Printer.GetMBBSymbol(MO.getMBB()->getNumber()), Ctx)); + break; + case MachineOperand::MO_GlobalAddress: + MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO)); + break; + case MachineOperand::MO_ExternalSymbol: + MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO)); + break; + case MachineOperand::MO_JumpTableIndex: + MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO)); + break; + case MachineOperand::MO_ConstantPoolIndex: + MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); + break; + } + + OutMI.addOperand(MCOp); + } + +} diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h new file mode 100644 index 000000000000..383d30d5de56 --- /dev/null +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h @@ -0,0 +1,56 @@ +//===-- ARMMCInstLower.h - Lower MachineInstr to MCInst -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARM_MCINSTLOWER_H +#define ARM_MCINSTLOWER_H + +#include "llvm/Support/Compiler.h" + +namespace llvm { + class AsmPrinter; + class MCAsmInfo; + class MCContext; + class MCInst; + class MCOperand; + class MCSymbol; + class MachineInstr; + class MachineModuleInfoMachO; + class MachineOperand; + class Mangler; + //class ARMSubtarget; + +/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst. +class VISIBILITY_HIDDEN ARMMCInstLower { + MCContext &Ctx; + Mangler &Mang; + AsmPrinter &Printer; + + //const ARMSubtarget &getSubtarget() const; +public: + ARMMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer) + : Ctx(ctx), Mang(mang), Printer(printer) {} + + void Lower(const MachineInstr *MI, MCInst &OutMI) const; + + //MCSymbol *GetPICBaseSymbol() const; + MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const; + MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; + MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; + MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; + +/* +private: + MachineModuleInfoMachO &getMachOMMI() const; + */ +}; + +} + +#endif diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt index a67fc8471a63..4e299f86ecb6 100644 --- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt +++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt @@ -2,5 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/ add_llvm_library(LLVMARMAsmPrinter ARMAsmPrinter.cpp + ARMInstPrinter.cpp + ARMMCInstLower.cpp ) -add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
\ No newline at end of file +add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen) diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index a961a576f40d..e7770b2292e1 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -196,14 +196,6 @@ This is especially bad when dynamic alloca is used. The all fixed size stack objects are referenced off the frame pointer with negative offsets. See oggenc for an example. -//===---------------------------------------------------------------------===// - -We are reserving R3 as a scratch register under thumb mode. So if it is live in -to the function, we save / restore R3 to / from R12. Until register scavenging -is done, we should save R3 to a high callee saved reg at emitPrologue time -(when hasFP is true or stack size is large) and restore R3 from that register -instead. This allows us to at least get rid of the save to r12 everytime it is -used. //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 3c896da4c0ca..6207177b9969 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -32,7 +32,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -394,31 +393,48 @@ rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return 0; } -/// saveScavengerRegister - Save the register so it can be used by the +/// saveScavengerRegister - Spill the register so it can be used by the /// register scavenger. Return true. -bool Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const TargetRegisterClass *RC, - unsigned Reg) const { +bool +Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, + const TargetRegisterClass *RC, + unsigned Reg) const { // Thumb1 can't use the emergency spill slot on the stack because // ldr/str immediate offsets must be positive, and if we're referencing // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. + DebugLoc DL = DebugLoc::getUnknownLoc(); + BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)). + addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill); + + // The UseMI is where we would like to restore the register. If there's + // interference with R12 before then, however, we'll need to restore it + // before that instead and adjust the UseMI. + bool done = false; + for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + // If this instruction affects R12, adjust our restore point. + for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = II->getOperand(i); + if (!MO.isReg() || MO.isUndef() || !MO.getReg() || + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + if (MO.getReg() == ARM::R12) { + UseMI = II; + done = true; + break; + } + } + } + // Restore the register from R12 + BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)). + addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill); - TII.copyRegToReg(MBB, I, ARM::R12, Reg, ARM::GPRRegisterClass, RC); return true; } -/// restoreScavengerRegister - restore a registers saved by -// saveScavengerRegister(). -void Thumb1RegisterInfo::restoreScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const TargetRegisterClass *RC, - unsigned Reg) const { - TII.copyRegToReg(MBB, I, Reg, ARM::R12, RC, ARM::GPRRegisterClass); -} - unsigned Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int *Value, @@ -828,7 +844,6 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, if (VARegSaveSize) { // Epilogue for vararg functions: pop LR to R3 and branch off it. - // FIXME: Verify this is still ok when R3 is no longer being reserved. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. .addReg(ARM::R3, RegState::Define); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index bb7a6199d10d..570a5bc8c2ec 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -57,12 +57,9 @@ public: bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, unsigned Reg) const; - void restoreScavengerRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - const TargetRegisterClass *RC, - unsigned Reg) const; unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int *Value = NULL, RegScavenger *RS = NULL) const; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 98b5cbdfb98f..427c0bb22b26 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -107,8 +107,12 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { // Finalize IT mask. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; - while (MBBI != E && Pos) { + // Branches, including tricky ones like LDM_RET, need to end an IT + // block so check the instruction we just put in the block. + while (MBBI != E && Pos && + (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) { MachineInstr *NMI = &*MBBI; + MI = NMI; DebugLoc ndl = NMI->getDebugLoc(); unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 6c4c15dfe354..f217e0e28f75 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -32,7 +32,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; |