diff options
Diffstat (limited to 'llvm/lib/Target/LoongArch')
18 files changed, 1306 insertions, 17 deletions
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index dd61bb2df077..1467d1757ff0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -27,6 +27,9 @@ using namespace llvm; #include "LoongArchGenMCPseudoLowering.inc" void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { + LoongArch_MC::verifyInstructionPredicates( + MI->getOpcode(), getSubtargetInfo().getFeatureBits()); + // Do any auto-generated pseudo lowerings. if (emitPseudoExpansionLowering(*OutStreamer, MI)) return; diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h index 7e5aa49f227c..b51c19188051 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -39,6 +39,10 @@ public: // tblgen'erated function. bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); + // Wrapper needed for tblgenned pseudo lowering. + bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { + return lowerLoongArchMachineOperandToMCOperand(MO, MCOp, *this); + } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 5b117d40e0a9..20448492a558 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -11,6 +11,22 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// +// LoongArch specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDT_LoongArchMOVGR2FR_W_LA64 + : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; +def SDT_LoongArchMOVFR2GR_S_LA64 + : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; +def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>; + +def loongarch_movgr2fr_w_la64 + : SDNode<"LoongArchISD::MOVGR2FR_W_LA64", SDT_LoongArchMOVGR2FR_W_LA64>; +def loongarch_movfr2gr_s_la64 + : SDNode<"LoongArchISD::MOVFR2GR_S_LA64", SDT_LoongArchMOVFR2GR_S_LA64>; +def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>; + +//===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -149,6 +165,7 @@ def : PatFPSetcc<SETULT, FCMP_CULT_S, FPR32>; def : PatFPSetcc<SETULE, FCMP_CULE_S, FPR32>; def : PatFPSetcc<SETUNE, FCMP_CUNE_S, FPR32>; def : PatFPSetcc<SETUO, FCMP_CUN_S, FPR32>; +def : PatFPSetcc<SETLT, FCMP_CLT_S, FPR32>; // TODO: Match signaling comparison strict_fsetccs with FCMP_S*_S instructions. @@ -174,4 +191,39 @@ def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_S, FPR32>; def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_S, FPR32>; def : PatFPSelectcc<SETUO, FCMP_CUN_S, FSEL_S, FPR32>; +/// Loads + +defm : LdPat<load, FLD_S, f32>; + +/// Stores + +defm : StPat<store, FST_S, FPR32, f32>; + +/// Floating point constants + +def : Pat<(f32 fpimm0), (MOVGR2FR_W R0)>; +def : Pat<(f32 fpimm0neg), (FNEG_S (MOVGR2FR_W R0))>; +def : Pat<(f32 fpimm1), (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1)))>; + +// FP Conversion +def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_W_S FPR32:$src)>; } // Predicates = [HasBasicF] + +let Predicates = [HasBasicF, IsLA64] in { +// GPR -> FPR +def : Pat<(loongarch_movgr2fr_w_la64 GPR:$src), (MOVGR2FR_W GPR:$src)>; +// FPR -> GPR +def : Pat<(loongarch_movfr2gr_s_la64 FPR32:$src), + (MOVFR2GR_S FPR32:$src)>; +// int -> f32 +def : Pat<(f32 (sint_to_fp GPR:$src)), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicF, IsLA64] + +let Predicates = [HasBasicF, IsLA32] in { +// GPR -> FPR +def : Pat<(bitconvert (i32 GPR:$src)), (MOVGR2FR_W GPR:$src)>; +// FPR -> GPR +def : Pat<(i32 (bitconvert FPR32:$src)), (MOVFR2GR_S FPR32:$src)>; +// int -> f32 +def : Pat<(f32 (sint_to_fp (i32 GPR:$src))), (FFINT_S_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicF, IsLA64] diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index 07fa61f4c361..bb50cec9f4c0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -131,6 +131,11 @@ def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>; def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>; } // Predicates = [HasBasicD, IsLA64] +// Instructions only available on LA32 +let Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 in { +def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR64, GPR>; +} // Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 + //===----------------------------------------------------------------------===// // Pseudo-instructions and codegen patterns //===----------------------------------------------------------------------===// @@ -164,6 +169,7 @@ def : PatFPSetcc<SETULT, FCMP_CULT_D, FPR64>; def : PatFPSetcc<SETULE, FCMP_CULE_D, FPR64>; def : PatFPSetcc<SETUNE, FCMP_CUNE_D, FPR64>; def : PatFPSetcc<SETUO, FCMP_CUN_D, FPR64>; +def : PatFPSetcc<SETLT, FCMP_CLT_D, FPR64>; // TODO: Match signaling comparison strict_fsetccs with FCMP_S*_D instructions. @@ -185,4 +191,52 @@ def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_D, FPR64>; def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_D, FPR64>; def : PatFPSelectcc<SETUO, FCMP_CUN_D, FSEL_D, FPR64>; +/// Loads + +defm : LdPat<load, FLD_D, f64>; + +/// Stores + +defm : StPat<store, FST_D, FPR64, f64>; + +/// FP conversion operations + +def : Pat<(loongarch_ftint FPR64:$src), (FTINTRZ_W_D FPR64:$src)>; +def : Pat<(f64 (loongarch_ftint FPR64:$src)), (FTINTRZ_L_D FPR64:$src)>; +def : Pat<(loongarch_ftint FPR32:$src), (FTINTRZ_L_S FPR32:$src)>; + +// f64 -> f32 +def : Pat<(f32 (fpround FPR64:$src)), (FCVT_S_D FPR64:$src)>; +// f32 -> f64 +def : Pat<(f64 (fpextend FPR32:$src)), (FCVT_D_S FPR32:$src)>; } // Predicates = [HasBasicD] + +/// Floating point constants + +let Predicates = [HasBasicD, IsLA64] in { +def : Pat<(f64 fpimm0), (MOVGR2FR_D R0)>; +def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FR_D R0))>; +def : Pat<(f64 fpimm1), (FFINT_D_L (MOVGR2FR_D (ADDI_D R0, 1)))>; + +// Convert int to FP +def : Pat<(f64 (sint_to_fp (i64 (sexti32 (i64 GPR:$src))))), + (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +def : Pat<(f64 (sint_to_fp GPR:$src)), (FFINT_D_L (MOVGR2FR_D GPR:$src))>; + +def : Pat<(f64 (uint_to_fp (i64 (zexti32 (i64 GPR:$src))))), + (FFINT_D_W (MOVGR2FR_W GPR:$src))>; + +def : Pat<(bitconvert GPR:$src), (MOVGR2FR_D GPR:$src)>; + +// Convert FP to int +def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; +} // Predicates = [HasBasicD, IsLA64] + +let Predicates = [HasBasicD, IsLA32] in { +def : Pat<(f64 fpimm0), (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0)>; +def : Pat<(f64 fpimm0neg), (FNEG_D (MOVGR2FRH_W (MOVGR2FR_W_64 R0), R0))>; +def : Pat<(f64 fpimm1), (FCVT_D_S (FFINT_S_W (MOVGR2FR_W (ADDI_W R0, 1))))>; + +// Convert int to FP +def : Pat<(f64 (sint_to_fp (i32 GPR:$src))), (FFINT_D_W (MOVGR2FR_W GPR:$src))>; +} // Predicates = [HasBasicD, IsLA32] diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 7182d55ca3cf..0d9ec9e2eaaa 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -11,7 +11,9 @@ //===----------------------------------------------------------------------===// #include "LoongArchFrameLowering.h" +#include "LoongArchMachineFunctionInfo.h" #include "LoongArchSubtarget.h" +#include "MCTargetDesc/LoongArchBaseInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -44,12 +46,178 @@ bool LoongArchFrameLowering::hasBP(const MachineFunction &MF) const { return MFI.hasVarSizedObjects() && TRI->hasStackRealignment(MF); } +void LoongArchFrameLowering::adjustReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DestReg, + Register SrcReg, int64_t Val, + MachineInstr::MIFlag Flag) const { + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + bool IsLA64 = STI.is64Bit(); + + if (DestReg == SrcReg && Val == 0) + return; + + if (isInt<12>(Val)) { + // addi.w/d $DstReg, $SrcReg, Val + BuildMI(MBB, MBBI, DL, + TII->get(IsLA64 ? LoongArch::ADDI_D : LoongArch::ADDI_W), DestReg) + .addReg(SrcReg) + .addImm(Val) + .setMIFlag(Flag); + return; + } + + report_fatal_error("adjustReg cannot yet handle adjustments >12 bits"); +} + +// Determine the size of the frame and maximum call frame size. +void LoongArchFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo. + uint64_t FrameSize = MFI.getStackSize(); + + // Make sure the frame is aligned. + FrameSize = alignTo(FrameSize, getStackAlign()); + + // Update frame info. + MFI.setStackSize(FrameSize); +} + void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // TODO: Implement this when we have function calls + MachineFrameInfo &MFI = MF.getFrameInfo(); + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + const LoongArchInstrInfo *TII = STI.getInstrInfo(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + + Register SPReg = LoongArch::R3; + Register FPReg = LoongArch::R22; + + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc DL; + + // Determine the correct frame layout + determineFrameLayout(MF); + + // First, compute final stack size. + uint64_t StackSize = MFI.getStackSize(); + + // Early exit if there is no need to allocate space in the stack. + if (StackSize == 0 && !MFI.adjustsStack()) + return; + + // Adjust stack. + adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup); + // Emit ".cfi_def_cfa_offset StackSize". + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + + const auto &CSI = MFI.getCalleeSavedInfo(); + + // The frame pointer is callee-saved, and code has been generated for us to + // save it to the stack. We need to skip over the storing of callee-saved + // registers as the frame pointer must be modified after it has been saved + // to the stack, not before. + std::advance(MBBI, CSI.size()); + + // Iterate over list of callee-saved registers and emit .cfi_offset + // directives. + for (const auto &Entry : CSI) { + int64_t Offset = MFI.getObjectOffset(Entry.getFrameIdx()); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( + nullptr, RI->getDwarfRegNum(Entry.getReg(), true), Offset)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } + + // Generate new FP. + if (hasFP(MF)) { + adjustReg(MBB, MBBI, DL, FPReg, SPReg, StackSize, MachineInstr::FrameSetup); + + // Emit ".cfi_def_cfa $fp, 0" + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, RI->getDwarfRegNum(FPReg, true), 0)); + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlag(MachineInstr::FrameSetup); + } } void LoongArchFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - // TODO: Implement this when we have function calls + const LoongArchRegisterInfo *RI = STI.getRegisterInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + Register SPReg = LoongArch::R3; + + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + const auto &CSI = MFI.getCalleeSavedInfo(); + // Skip to before the restores of callee-saved registers. + auto LastFrameDestroy = MBBI; + if (!CSI.empty()) + LastFrameDestroy = std::prev(MBBI, CSI.size()); + + // Get the number of bytes from FrameInfo. + uint64_t StackSize = MFI.getStackSize(); + + // Restore the stack pointer. + if (RI->hasStackRealignment(MF) || MFI.hasVarSizedObjects()) { + assert(hasFP(MF) && "frame pointer should not have been eliminated"); + adjustReg(MBB, LastFrameDestroy, DL, SPReg, LoongArch::R22, -StackSize, + MachineInstr::FrameDestroy); + } + + // Deallocate stack + adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy); +} + +void LoongArchFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); + // Unconditionally spill RA and FP only if the function uses a frame + // pointer. + if (hasFP(MF)) { + SavedRegs.set(LoongArch::R1); + SavedRegs.set(LoongArch::R22); + } + // Mark BP as used if function has dedicated base pointer. + if (hasBP(MF)) + SavedRegs.set(LoongArchABI::getBPReg()); +} + +StackOffset LoongArchFrameLowering::getFrameIndexReference( + const MachineFunction &MF, int FI, Register &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + // Callee-saved registers should be referenced relative to the stack + // pointer (positive offset), otherwise use the frame pointer (negative + // offset). + const auto &CSI = MFI.getCalleeSavedInfo(); + int MinCSFI = 0; + int MaxCSFI = -1; + StackOffset Offset = + StackOffset::getFixed(MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + + MFI.getOffsetAdjustment()); + + if (CSI.size()) { + MinCSFI = CSI[0].getFrameIdx(); + MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); + } + + FrameReg = RI->getFrameRegister(MF); + if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) { + FrameReg = LoongArch::R3; + Offset += StackOffset::getFixed(MFI.getStackSize()); + } + + return Offset; } diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h index 25c53efc10f1..014b666de711 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h @@ -31,8 +31,26 @@ public: void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; + void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, + RegScavenger *RS) const override; + + MachineBasicBlock::iterator + eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const override { + return MBB.erase(MI); + } + + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; + bool hasFP(const MachineFunction &MF) const override; bool hasBP(const MachineFunction &MF) const; + +private: + void determineFrameLayout(MachineFunction &MF) const; + void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + const DebugLoc &DL, Register DestReg, Register SrcReg, + int64_t Val, MachineInstr::MIFlag Flag) const; }; } // namespace llvm #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHFRAMELOWERING_H diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp index cc9ea0255d98..bb40ff817574 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp @@ -33,13 +33,14 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { unsigned Opcode = Node->getOpcode(); MVT GRLenVT = Subtarget->getGRLenVT(); SDLoc DL(Node); + MVT VT = Node->getSimpleValueType(0); switch (Opcode) { default: break; case ISD::Constant: { int64_t Imm = cast<ConstantSDNode>(Node)->getSExtValue(); - if (Imm == 0 && Node->getSimpleValueType(0) == GRLenVT) { + if (Imm == 0 && VT == GRLenVT) { SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, LoongArch::R0, GRLenVT); ReplaceNode(Node, New.getNode()); @@ -60,6 +61,15 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, Result); return; } + case ISD::FrameIndex: { + SDValue Imm = CurDAG->getTargetConstant(0, DL, GRLenVT); + int FI = cast<FrameIndexSDNode>(Node)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT); + unsigned ADDIOp = + Subtarget->is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + ReplaceNode(Node, CurDAG->getMachineNode(ADDIOp, DL, VT, TFI, Imm)); + return; + } // TODO: Add selection nodes needed later. } @@ -67,6 +77,17 @@ void LoongArchDAGToDAGISel::Select(SDNode *Node) { SelectCode(Node); } +bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) { + // If this is FrameIndex, select it directly. Otherwise just let it get + // selected to a register independently. + if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) + Base = + CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getGRLenVT()); + else + Base = Addr; + return true; +} + bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt) { // Shift instructions on LoongArch only read the lower 5 or 6 bits of the @@ -125,6 +146,39 @@ bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth, return true; } +bool LoongArchDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) { + if (N.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) { + Val = N.getOperand(0); + return true; + } + MVT VT = N.getSimpleValueType(); + if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) { + Val = N; + return true; + } + + return false; +} + +bool LoongArchDAGToDAGISel::selectZExti32(SDValue N, SDValue &Val) { + if (N.getOpcode() == ISD::AND) { + auto *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (C && C->getZExtValue() == UINT64_C(0xFFFFFFFF)) { + Val = N.getOperand(0); + return true; + } + } + MVT VT = N.getSimpleValueType(); + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 32); + if (CurDAG->MaskedValueIsZero(N, Mask)) { + Val = N; + return true; + } + + return false; +} + // This pass converts a legalized DAG into a LoongArch-specific DAG, ready // for instruction scheduling. FunctionPass *llvm::createLoongArchISelDag(LoongArchTargetMachine &TM) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h index f477129d933c..7ad329a64424 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h @@ -38,6 +38,8 @@ public: void Select(SDNode *Node) override; + bool SelectBaseAddr(SDValue Addr, SDValue &Base); + bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt); bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) { return selectShiftMask(N, Subtarget->getGRLen(), ShAmt); @@ -46,6 +48,9 @@ public: return selectShiftMask(N, 32, ShAmt); } + bool selectSExti32(SDValue N, SDValue &Val); + bool selectZExti32(SDValue N, SDValue &Val); + // Include the pieces autogenerated from the target description. #include "LoongArchGenDAGISel.inc" }; diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index d5a469216859..4acf90bd9788 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -17,14 +17,21 @@ #include "LoongArchRegisterInfo.h" #include "LoongArchSubtarget.h" #include "LoongArchTargetMachine.h" +#include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" using namespace llvm; #define DEBUG_TYPE "loongarch-isel-lowering" +static cl::opt<bool> ZeroDivCheck( + "loongarch-check-zero-division", cl::Hidden, + cl::desc("Trap on integer division by zero."), + cl::init(false)); + LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, const LoongArchSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { @@ -37,15 +44,25 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, if (Subtarget.hasBasicD()) addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass); + setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT, + MVT::i1, Promote); + // TODO: add necessary setOperationAction calls later. setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom); setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom); + setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom); + + setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom); if (Subtarget.is64Bit()) { setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::BITCAST, MVT::i32, Custom); + if (Subtarget.hasBasicF() && !Subtarget.hasBasicD()) + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); } static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE, @@ -58,10 +75,19 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, if (Subtarget.hasBasicD()) { setCondCodeAction(FPCCToExpand, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); } + setOperationAction(ISD::BR_CC, GRLenVT, Expand); setOperationAction(ISD::SELECT_CC, GRLenVT, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand); + if (!Subtarget.is64Bit()) + setLibcallName(RTLIB::MUL_I128, nullptr); + + setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom); + setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom); // Compute derived properties from the register classes. computeRegisterProperties(STI.getRegisterInfo()); @@ -70,11 +96,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setBooleanContents(ZeroOrOneBooleanContent); + setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen()); + // Function alignments. const Align FunctionAlignment(4); setMinFunctionAlignment(FunctionAlignment); setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRL); } @@ -83,6 +112,8 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, switch (Op.getOpcode()) { default: report_fatal_error("unimplemented operand"); + case ISD::GlobalAddress: + return lowerGlobalAddress(Op, DAG); case ISD::SHL_PARTS: return lowerShiftLeftParts(Op, DAG); case ISD::SRA_PARTS: @@ -96,7 +127,105 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op, assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); return SDValue(); + case ISD::ConstantPool: + return lowerConstantPool(Op, DAG); + case ISD::FP_TO_SINT: + return lowerFP_TO_SINT(Op, DAG); + case ISD::BITCAST: + return lowerBITCAST(Op, DAG); + case ISD::FP_TO_UINT: + return SDValue(); + case ISD::UINT_TO_FP: + return lowerUINT_TO_FP(Op, DAG); + } +} + +SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + auto &TLI = DAG.getTargetLoweringInfo(); + SDValue Tmp1, Tmp2; + SDValue Op1 = Op.getOperand(0); + if (Op1->getOpcode() == ISD::AssertZext || + Op1->getOpcode() == ISD::AssertSext) + return Op; + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0)); + SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc); + SDNode *N = Res.getNode(); + TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG); + return Tmp1; +} + +SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + SDValue Op0 = Op.getOperand(0); + + if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 && + Subtarget.is64Bit() && Subtarget.hasBasicF()) { + SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); + return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0); } + return Op; +} + +SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + + if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() && + !Subtarget.hasBasicD()) { + SDValue Dst = + DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0)); + return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst); + } + + EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits()); + SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0)); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc); +} + +SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT Ty = Op.getValueType(); + ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); + + // FIXME: Only support PC-relative addressing to access the symbol. + // Target flags will be added later. + if (!isPositionIndependent()) { + SDValue ConstantN = DAG.getTargetConstantPool( + N->getConstVal(), Ty, N->getAlign(), N->getOffset()); + SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, ConstantN), + 0); + SDValue Addr(DAG.getMachineNode(Subtarget.is64Bit() ? LoongArch::ADDI_D + : LoongArch::ADDI_W, + DL, Ty, AddrHi, ConstantN), + 0); + return Addr; + } + report_fatal_error("Unable to lower ConstantPool"); +} + +SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W; + + // FIXME: Only support PC-relative addressing to access the symbol. + // TODO: Add target flags. + if (!isPositionIndependent()) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty); + SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0); + SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0); + return Addr; + } + report_fatal_error("Unable to lowerGlobalAddress"); } SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op, @@ -238,6 +367,36 @@ void LoongArchTargetLowering::ReplaceNodeResults( break; } break; + case ISD::FP_TO_SINT: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + SDValue Src = N->getOperand(0); + EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0)); + SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src); + Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst)); + break; + } + case ISD::BITCAST: { + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() && + Subtarget.hasBasicF()) { + SDValue Dst = + DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst)); + } + break; + } + case ISD::FP_TO_UINT: { + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + "Unexpected custom legalisation"); + auto &TLI = DAG.getTargetLoweringInfo(); + SDValue Tmp1, Tmp2; + TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1)); + break; + } } } @@ -345,6 +504,224 @@ static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + MVT GRLenVT = Subtarget.getGRLenVT(); + EVT ValTy = N->getValueType(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + ConstantSDNode *CN0, *CN1; + SDLoc DL(N); + unsigned ValBits = ValTy.getSizeInBits(); + unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1; + unsigned Shamt; + bool SwapAndRetried = false; + + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + if (ValBits != 32 && ValBits != 64) + return SDValue(); + +Retry: + // 1st pattern to match BSTRINS: + // R = or (and X, mask0), (and (shl Y, lsb), mask1) + // where mask1 = (2**size - 1) << lsb, mask0 = ~mask1 + // => + // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && + MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskIdx0 && + (MaskIdx0 + MaskLen0 <= ValBits)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0).getOperand(0), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 2nd pattern to match BSTRINS: + // R = or (and X, mask0), (shl (and Y, mask1), lsb) + // where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb) + // => + // R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1) + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskIdx0 && + (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) && + MaskLen0 == MaskLen1 && MaskIdx1 == 0 && + (MaskIdx0 + MaskLen0 <= ValBits)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0).getOperand(0), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 3rd pattern to match BSTRINS: + // R = or (and X, mask0), (and Y, mask1) + // where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0 + // => + // R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb + // where msb = lsb + size - 1 + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + (MaskIdx0 + MaskLen0 <= 64) && + (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) && + (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1, + DAG.getConstant(MaskIdx0, DL, GRLenVT)), + DAG.getConstant(ValBits == 32 + ? (MaskIdx0 + (MaskLen0 & 31) - 1) + : (MaskIdx0 + MaskLen0 - 1), + DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 4th pattern to match BSTRINS: + // R = or (and X, mask), (shl Y, shamt) + // where mask = (2**shamt - 1) + // => + // R = BSTRINS X, Y, ValBits - 1, shamt + // where ValBits = 32 or 64 + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) && + MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + (Shamt = CN1->getZExtValue()) == MaskLen0 && + (MaskIdx0 + MaskLen0 <= ValBits)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + N1.getOperand(0), + DAG.getConstant((ValBits - 1), DL, GRLenVT), + DAG.getConstant(Shamt, DL, GRLenVT)); + } + + // 5th pattern to match BSTRINS: + // R = or (and X, mask), const + // where ~mask = (2**size - 1) << lsb, mask & const = 0 + // => + // R = BSTRINS X, (const >> lsb), msb, lsb + // where msb = lsb + size - 1 + if (N0.getOpcode() == ISD::AND && + (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) && + isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) && + (CN1 = dyn_cast<ConstantSDNode>(N1)) && + (CN1->getSExtValue() & CN0->getSExtValue()) == 0) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n"); + return DAG.getNode( + LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0), + DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy), + DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT), + DAG.getConstant(MaskIdx0, DL, GRLenVT)); + } + + // 6th pattern. + // a = b | ((c & mask) << shamt), where all positions in b to be overwritten + // by the incoming bits are known to be zero. + // => + // a = BSTRINS b, c, shamt + MaskLen - 1, shamt + // + // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th + // pattern is more common than the 1st. So we put the 1st before the 6th in + // order to match as many nodes as possible. + ConstantSDNode *CNMask, *CNShamt; + unsigned MaskIdx, MaskLen; + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND && + (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && + MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + CNShamt->getZExtValue() + MaskLen <= ValBits) { + Shamt = CNShamt->getZExtValue(); + APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt); + if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, + N1.getOperand(0).getOperand(0), + DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT), + DAG.getConstant(Shamt, DL, GRLenVT)); + } + } + + // 7th pattern. + // a = b | ((c << shamt) & shifted_mask), where all positions in b to be + // overwritten by the incoming bits are known to be zero. + // => + // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx + // + // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd + // before the 7th in order to match as many nodes as possible. + if (N1.getOpcode() == ISD::AND && + (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) && + N1.getOperand(0).getOpcode() == ISD::SHL && + (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) && + CNShamt->getZExtValue() == MaskIdx) { + APInt ShMask(ValBits, CNMask->getZExtValue()); + if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, + N1.getOperand(0).getOperand(0), + DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), + DAG.getConstant(MaskIdx, DL, GRLenVT)); + } + } + + // (or a, b) and (or b, a) are equivalent, so swap the operands and retry. + if (!SwapAndRetried) { + std::swap(N0, N1); + SwapAndRetried = true; + goto Retry; + } + + SwapAndRetried = false; +Retry2: + // 8th pattern. + // a = b | (c & shifted_mask), where all positions in b to be overwritten by + // the incoming bits are known to be zero. + // => + // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx + // + // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So + // we put it here in order to match as many nodes as possible or generate less + // instructions. + if (N1.getOpcode() == ISD::AND && + (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) && + isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) { + APInt ShMask(ValBits, CNMask->getZExtValue()); + if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) { + LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n"); + return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0, + DAG.getNode(ISD::SRL, DL, N1->getValueType(0), + N1->getOperand(0), + DAG.getConstant(MaskIdx, DL, GRLenVT)), + DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT), + DAG.getConstant(MaskIdx, DL, GRLenVT)); + } + } + // Swap N0/N1 and retry. + if (!SwapAndRetried) { + std::swap(N0, N1); + SwapAndRetried = true; + goto Retry2; + } + + return SDValue(); +} + SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -353,12 +730,62 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::AND: return performANDCombine(N, DAG, DCI, Subtarget); + case ISD::OR: + return performORCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); } return SDValue(); } +static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI, + MachineBasicBlock &MBB, + const TargetInstrInfo &TII) { + if (!ZeroDivCheck) + return &MBB; + + // Build instructions: + // div(or mod) $dst, $dividend, $divisor + // bnez $divisor, 8 + // break 7 + // fallthrough + MachineOperand &Divisor = MI.getOperand(2); + auto FallThrough = std::next(MI.getIterator()); + + BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BNEZ)) + .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill())) + .addImm(8); + + // See linux header file arch/loongarch/include/uapi/asm/break.h for the + // definition of BRK_DIVZERO. + BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BREAK)) + .addImm(7/*BRK_DIVZERO*/); + + // Clear Divisor's kill flag. + Divisor.setIsKill(false); + + return &MBB; +} + +MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( + MachineInstr &MI, MachineBasicBlock *BB) const { + + switch (MI.getOpcode()) { + default: + llvm_unreachable("Unexpected instr type to insert"); + case LoongArch::DIV_W: + case LoongArch::DIV_WU: + case LoongArch::MOD_W: + case LoongArch::MOD_WU: + case LoongArch::DIV_D: + case LoongArch::DIV_DU: + case LoongArch::MOD_D: + case LoongArch::MOD_DU: + return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo()); + break; + } +} + const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((LoongArchISD::NodeType)Opcode) { case LoongArchISD::FIRST_NUMBER: @@ -369,11 +796,16 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const { return "LoongArchISD::" #node; // TODO: Add more target-dependent nodes later. + NODE_NAME_CASE(CALL) NODE_NAME_CASE(RET) NODE_NAME_CASE(SLL_W) NODE_NAME_CASE(SRA_W) NODE_NAME_CASE(SRL_W) + NODE_NAME_CASE(BSTRINS) NODE_NAME_CASE(BSTRPICK) + NODE_NAME_CASE(MOVGR2FR_W_LA64) + NODE_NAME_CASE(MOVFR2GR_S_LA64) + NODE_NAME_CASE(FTINT) } #undef NODE_NAME_CASE return nullptr; @@ -483,6 +915,132 @@ SDValue LoongArchTargetLowering::LowerFormalArguments( return Chain; } +// Lower a call to a callseq_start + CALL + callseq_end chain, and add input +// and output parameter nodes. +SDValue +LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc &DL = CLI.DL; + SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; + SDValue Chain = CLI.Chain; + SDValue Callee = CLI.Callee; + CallingConv::ID CallConv = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + CLI.IsTailCall = false; + + if (IsVarArg) + report_fatal_error("LowerCall with varargs not implemented"); + + MachineFunction &MF = DAG.getMachineFunction(); + + // Analyze the operands of the call, assigning locations to each operand. + SmallVector<CCValAssign> ArgLocs; + CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + + analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch); + + // Get a count of how many bytes are to be pushed on the stack. + unsigned NumBytes = ArgCCInfo.getNextStackOffset(); + + for (auto &Arg : Outs) { + if (!Arg.Flags.isByVal()) + continue; + report_fatal_error("Passing arguments byval not implemented"); + } + + Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL); + + // Copy argument values to their designated locations. + SmallVector<std::pair<Register, SDValue>> RegsToPass; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue ArgValue = OutVals[i]; + + // Promote the value if needed. + // For now, only handle fully promoted arguments. + if (VA.getLocInfo() != CCValAssign::Full) + report_fatal_error("Unknown loc info"); + + if (VA.isRegLoc()) { + // Queue up the argument copies and emit them at the end. + RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); + } else { + report_fatal_error("Passing arguments via the stack not implemented"); + } + } + + SDValue Glue; + + // Build a sequence of copy-to-reg nodes, chained and glued together. + for (auto &Reg : RegsToPass) { + Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue); + Glue = Chain.getValue(1); + } + + // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a + // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't + // split it and then direct call can be matched by PseudoCALL. + // FIXME: Add target flags for relocation. + if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT); + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) + Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT); + + // The first call operand is the chain and the second is the target address. + SmallVector<SDValue> Ops; + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are + // known live into the call. + for (auto &Reg : RegsToPass) + Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Glue the call to the argument copies, if any. + if (Glue.getNode()) + Ops.push_back(Glue); + + // Emit the call. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops); + DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); + Glue = Chain.getValue(1); + + // Mark the end of the call, which is glued to the call itself. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true), + DAG.getConstant(0, DL, PtrVT, true), Glue, DL); + Glue = Chain.getValue(1); + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign> RVLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch); + + // Copy all of the result registers out of their specified physreg. + for (auto &VA : RVLocs) { + // Copy the value out. + SDValue RetValue = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue); + Chain = RetValue.getValue(1); + Glue = RetValue.getValue(2); + + InVals.push_back(Chain.getValue(0)); + } + + return Chain; +} + bool LoongArchTargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { @@ -529,3 +1087,14 @@ SDValue LoongArchTargetLowering::LowerReturn( return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps); } + +bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + assert((VT == MVT::f32 || VT == MVT::f64) && "Unexpected VT"); + + if (VT == MVT::f32 && !Subtarget.hasBasicF()) + return false; + if (VT == MVT::f64 && !Subtarget.hasBasicD()) + return false; + return (Imm.isZero() || Imm.isExactlyValue(+1.0)); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h index c852577a3744..279550482675 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h @@ -27,6 +27,7 @@ enum NodeType : unsigned { FIRST_NUMBER = ISD::BUILTIN_OP_END, // TODO: add more LoongArchISDs + CALL, RET, // 32-bit shifts, directly matching the semantics of the named LoongArch // instructions. @@ -34,6 +35,13 @@ enum NodeType : unsigned { SRA_W, SRL_W, + // FPR<->GPR transfer operations + MOVGR2FR_W_LA64, + MOVFR2GR_S_LA64, + + FTINT, + + BSTRINS, BSTRPICK, }; @@ -72,6 +80,8 @@ public: const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const override; private: /// Target-specific function used to lower LoongArch calling conventions. @@ -86,8 +96,24 @@ private: const SmallVectorImpl<ISD::OutputArg> &Outs, LoongArchCCAssignFn Fn) const; + SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; + + MachineBasicBlock * + EmitInstrWithCustomInserter(MachineInstr &MI, + MachineBasicBlock *BB) const override; + SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + + bool isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const override; + + bool shouldInsertFencesForAtomic(const Instruction *I) const override { + return isa<LoadInst>(I) || isa<StoreInst>(I); + } }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index 146ef53befd5..bcbd4b28f3c7 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -12,6 +12,7 @@ #include "LoongArchInstrInfo.h" #include "LoongArch.h" +#include "LoongArchMachineFunctionInfo.h" using namespace llvm; @@ -19,8 +20,8 @@ using namespace llvm; #include "LoongArchGenInstrInfo.inc" LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI) - // FIXME: add CFSetup and CFDestroy Inst when we implement function call. - : LoongArchGenInstrInfo() {} + : LoongArchGenInstrInfo(LoongArch::ADJCALLSTACKDOWN, + LoongArch::ADJCALLSTACKUP) {} void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -47,3 +48,68 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, DL, get(Opc), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)); } + +void LoongArchInstrInfo::storeRegToStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg, + bool IsKill, int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + + unsigned Opcode; + if (LoongArch::GPRRegClass.hasSubClassEq(RC)) + Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + ? LoongArch::ST_W + : LoongArch::ST_D; + else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FST_D; + else + llvm_unreachable("Can't store this register to stack slot"); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + + BuildMI(MBB, I, DL, get(Opcode)) + .addReg(SrcReg, getKillRegState(IsKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); +} + +void LoongArchInstrInfo::loadRegFromStackSlot( + MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DstReg, + int FI, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const { + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + MachineFrameInfo &MFI = MF->getFrameInfo(); + + unsigned Opcode; + if (LoongArch::GPRRegClass.hasSubClassEq(RC)) + Opcode = TRI->getRegSizeInBits(LoongArch::GPRRegClass) == 32 + ? LoongArch::LD_W + : LoongArch::LD_D; + else if (LoongArch::FPR32RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_S; + else if (LoongArch::FPR64RegClass.hasSubClassEq(RC)) + Opcode = LoongArch::FLD_D; + else + llvm_unreachable("Can't load this register from stack slot"); + + MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); + + BuildMI(MBB, I, DL, get(Opcode), DstReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); +} diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h index f31943b85a51..0a8c86a5e0c2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h @@ -30,6 +30,16 @@ public: void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc) const override; + + void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register SrcReg, + bool IsKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; + void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register DstReg, + int FrameIndex, const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 6b8ee9e43f94..d07d086bd7da 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -14,22 +14,45 @@ // LoongArch specific DAG Nodes. //===----------------------------------------------------------------------===// +// Target-independent type requirements, but with target-specific formats. +def SDT_CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; +def SDT_CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, + SDTCisVT<1, i32>]>; + // Target-dependent type requirements. +def SDT_LoongArchCall : SDTypeProfile<0, -1, [SDTCisVT<0, GRLenVT>]>; def SDT_LoongArchIntBinOpW : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64> ]>; +def SDT_LoongArchBStrIns: SDTypeProfile<1, 4, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<3>, + SDTCisSameAs<3, 4> +]>; + def SDT_LoongArchBStrPick: SDTypeProfile<1, 3, [ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisSameAs<2, 3> ]>; // TODO: Add LoongArch specific DAG Nodes +// Target-independent nodes, but with target-specific formats. +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_CallSeqStart, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + // Target-dependent nodes. +def loongarch_call : SDNode<"LoongArchISD::CALL", SDT_LoongArchCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>; def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>; def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>; +def loongarch_bstrins + : SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>; def loongarch_bstrpick : SDNode<"LoongArchISD::BSTRPICK", SDT_LoongArchBStrPick>; @@ -106,7 +129,14 @@ def simm16 : Operand<GRLenVT> { let DecoderMethod = "decodeSImmOperand<16>"; } -def simm16_lsl2 : Operand<GRLenVT> { +def simm16_lsl2 : Operand<GRLenVT>, + ImmLeaf<GRLenVT, [{return isInt<16>(Imm>>2);}]> { + let ParserMatchClass = SImmAsmOperand<16, "lsl2">; + let EncoderMethod = "getImmOpValueAsr2"; + let DecoderMethod = "decodeSImmOperand<16, 2>"; +} + +def simm16_lsl2_br : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<16, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<16, 2>"; @@ -117,13 +147,13 @@ def simm20 : Operand<GRLenVT> { let DecoderMethod = "decodeSImmOperand<20>"; } -def simm21_lsl2 : Operand<GRLenVT> { +def simm21_lsl2 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<21, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<21, 2>"; } -def simm26_lsl2 : Operand<GRLenVT> { +def simm26_lsl2 : Operand<OtherVT> { let ParserMatchClass = SImmAsmOperand<26, "lsl2">; let EncoderMethod = "getImmOpValueAsr2"; let DecoderMethod = "decodeSImmOperand<26, 2>"; @@ -141,6 +171,24 @@ def NegImm : SDNodeXForm<imm, [{ N->getValueType(0)); }]>; +// FP immediate patterns. +def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>; +def fpimm0neg : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>; +def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>; + +def CallSymbol: AsmOperandClass { + let Name = "CallSymbol"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImm"; +} + +// A bare symbol used in call only. +def call_symbol : Operand<iPTR> { + let ParserMatchClass = CallSymbol; +} + +def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">; + //===----------------------------------------------------------------------===// // Instruction Formats //===----------------------------------------------------------------------===// @@ -185,7 +233,7 @@ class RDTIME_2R<bits<22> op, string opstr> : Fmt2R<op, (outs GPR:$rd, GPR:$rj), (ins), opstr, "$rd, $rj">; class BrCC_2RI16<bits<6> op, string opstr> - : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2:$imm16), opstr, + : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2_br:$imm16), opstr, "$rj, $rd, $imm16"> { let isBranch = 1; let isTerminator = 1; @@ -274,10 +322,12 @@ def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>; def MUL_W : ALU_3R<0b00000000000111000, "mul.w">; def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">; def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">; +let usesCustomInserter = true in { def DIV_W : ALU_3R<0b00000000001000000, "div.w">; def MOD_W : ALU_3R<0b00000000001000001, "mod.w">; def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">; def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">; +} // usesCustomInserter = true // Bit-shift Instructions def SLL_W : ALU_3R<0b00000000000101110, "sll.w">; @@ -379,10 +429,12 @@ def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">; def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">; def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">; def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">; +let usesCustomInserter = true in { def DIV_D : ALU_3R<0b00000000001000100, "div.d">; def MOD_D : ALU_3R<0b00000000001000101, "mod.d">; def DIV_DU : ALU_3R<0b00000000001000110, "div.du">; def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">; +} // usesCustomInserter = true // Bit-shift Instructions for 64-bits def SLL_D : ALU_3R<0b00000000000110001, "sll.d">; @@ -545,6 +597,9 @@ def shiftMaskGRLen : ComplexPattern<GRLenVT, 1, "selectShiftMaskGRLen", [], [], 0>; def shiftMask32 : ComplexPattern<i64, 1, "selectShiftMask32", [], [], 0>; +def sexti32 : ComplexPattern<i64, 1, "selectSExti32">; +def zexti32 : ComplexPattern<i64, 1, "selectZExti32">; + class shiftop<SDPatternOperator operator> : PatFrag<(ops node:$val, node:$count), (operator node:$val, (GRLenVT (shiftMaskGRLen node:$count)))>; @@ -556,6 +611,13 @@ let Predicates = [IsLA32] in { def : PatGprGpr<add, ADD_W>; def : PatGprImm<add, ADDI_W, simm12>; def : PatGprGpr<sub, SUB_W>; +def : PatGprGpr<sdiv, DIV_W>; +def : PatGprGpr<udiv, DIV_WU>; +def : PatGprGpr<srem, MOD_W>; +def : PatGprGpr<urem, MOD_WU>; +def : PatGprGpr<mul, MUL_W>; +def : PatGprGpr<mulhs, MULH_W>; +def : PatGprGpr<mulhu, MULH_WU>; } // Predicates = [IsLA32] let Predicates = [IsLA64] in { @@ -565,6 +627,24 @@ def : PatGprImm<add, ADDI_D, simm12>; def : PatGprImm_32<add, ADDI_W, simm12>; def : PatGprGpr<sub, SUB_D>; def : PatGprGpr_32<sub, SUB_W>; +def : PatGprGpr<sdiv, DIV_D>; +def : PatGprGpr<udiv, DIV_DU>; +def : PatGprGpr<srem, MOD_D>; +def : PatGprGpr<urem, MOD_DU>; +// TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the +// product are used. +def : PatGprGpr<mul, MUL_D>; +def : PatGprGpr<mulhs, MULH_D>; +def : PatGprGpr<mulhu, MULH_DU>; +// Select MULW_D_W for calculating the full 64 bits product of i32xi32 signed +// multiplication. +def : Pat<(i64 (mul (sext_inreg GPR:$rj, i32), (sext_inreg GPR:$rk, i32))), + (MULW_D_W GPR:$rj, GPR:$rk)>; +// Select MULW_D_WU for calculating the full 64 bits product of i32xi32 +// unsigned multiplication. +def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)), + (loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))), + (MULW_D_WU GPR:$rj, GPR:$rk)>; } // Predicates = [IsLA64] def : PatGprGpr<and, AND>; @@ -649,19 +729,143 @@ def : Pat<(select GPR:$cond, GPR:$t, GPR:$f), /// Branches and jumps +class BccPat<PatFrag CondOp, LAInst Inst> + : Pat<(brcond (GRLenVT (CondOp GPR:$rj, GPR:$rd)), bb:$imm16), + (Inst GPR:$rj, GPR:$rd, bb:$imm16)>; + +def : BccPat<seteq, BEQ>; +def : BccPat<setne, BNE>; +def : BccPat<setlt, BLT>; +def : BccPat<setge, BGE>; +def : BccPat<setult, BLTU>; +def : BccPat<setuge, BGEU>; + +class BccSwapPat<PatFrag CondOp, LAInst InstBcc> + : Pat<(brcond (GRLenVT (CondOp GPR:$rd, GPR:$rj)), bb:$imm16), + (InstBcc GPR:$rj, GPR:$rd, bb:$imm16)>; + +// Condition codes that don't have matching LoongArch branch instructions, but +// are trivially supported by swapping the two input operands. +def : BccSwapPat<setgt, BLT>; +def : BccSwapPat<setle, BGE>; +def : BccSwapPat<setugt, BLTU>; +def : BccSwapPat<setule, BGEU>; + +// An extra pattern is needed for a brcond without a setcc (i.e. where the +// condition was calculated elsewhere). +def : Pat<(brcond GPR:$rj, bb:$imm21), (BNEZ GPR:$rj, bb:$imm21)>; + +let isBarrier = 1, isBranch = 1, isTerminator = 1 in +def PseudoBR : Pseudo<(outs), (ins simm26_lsl2:$imm26), [(br bb:$imm26)]>, + PseudoInstExpansion<(B simm26_lsl2:$imm26)>; + +let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in +def PseudoBRIND : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16), []>, + PseudoInstExpansion<(JIRL R0, GPR:$rj, simm16_lsl2:$imm16)>; + +def : Pat<(brind GPR:$rj), (PseudoBRIND GPR:$rj, 0)>; +def : Pat<(brind (add GPR:$rj, simm16_lsl2:$imm16)), + (PseudoBRIND GPR:$rj, simm16_lsl2:$imm16)>; + +let isCall = 1, Defs = [R1] in +def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []> { + let AsmString = "bl\t$func"; +} + +def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>; +def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>; + +let isCall = 1, Defs = [R1] in +def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj), + [(loongarch_call GPR:$rj)]>, + PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>; + let isBarrier = 1, isReturn = 1, isTerminator = 1 in def PseudoRET : Pseudo<(outs), (ins), [(loongarch_ret)]>, PseudoInstExpansion<(JIRL R0, R1, 0)>; -/// BSTRPICK +/// BSTRINS and BSTRPICK -let Predicates = [IsLA32] in +let Predicates = [IsLA32] in { +def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd), + (BSTRINS_W GPR:$rd, GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; def : Pat<(loongarch_bstrpick GPR:$rj, uimm5:$msbd, uimm5:$lsbd), (BSTRPICK_W GPR:$rj, uimm5:$msbd, uimm5:$lsbd)>; +} // Predicates = [IsLA32] -let Predicates = [IsLA64] in +let Predicates = [IsLA64] in { +def : Pat<(loongarch_bstrins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd), + (BSTRINS_D GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; def : Pat<(loongarch_bstrpick GPR:$rj, uimm6:$msbd, uimm6:$lsbd), (BSTRPICK_D GPR:$rj, uimm6:$msbd, uimm6:$lsbd)>; +} // Predicates = [IsLA64] + +/// Loads + +multiclass LdPat<PatFrag LoadOp, LAInst Inst, ValueType vt = GRLenVT> { + def : Pat<(vt (LoadOp BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>; + def : Pat<(vt (LoadOp (add BaseAddr:$rj, simm12:$imm12))), + (Inst BaseAddr:$rj, simm12:$imm12)>; +} + +defm : LdPat<sextloadi8, LD_B>; +defm : LdPat<extloadi8, LD_B>; +defm : LdPat<sextloadi16, LD_H>; +defm : LdPat<extloadi16, LD_H>; +defm : LdPat<load, LD_W>, Requires<[IsLA32]>; +defm : LdPat<zextloadi8, LD_BU>; +defm : LdPat<zextloadi16, LD_HU>; +let Predicates = [IsLA64] in { +defm : LdPat<sextloadi32, LD_W, i64>; +defm : LdPat<extloadi32, LD_W, i64>; +defm : LdPat<zextloadi32, LD_WU, i64>; +defm : LdPat<load, LD_D, i64>; +} // Predicates = [IsLA64] + +/// Stores + +multiclass StPat<PatFrag StoreOp, LAInst Inst, RegisterClass StTy, + ValueType vt> { + def : Pat<(StoreOp (vt StTy:$rd), BaseAddr:$rj), + (Inst StTy:$rd, BaseAddr:$rj, 0)>; + def : Pat<(StoreOp (vt StTy:$rd), (add BaseAddr:$rj, simm12:$imm12)), + (Inst StTy:$rd, BaseAddr:$rj, simm12:$imm12)>; +} + +defm : StPat<truncstorei8, ST_B, GPR, GRLenVT>; +defm : StPat<truncstorei16, ST_H, GPR, GRLenVT>; +defm : StPat<store, ST_W, GPR, i32>, Requires<[IsLA32]>; +let Predicates = [IsLA64] in { +defm : StPat<truncstorei32, ST_W, GPR, i64>; +defm : StPat<store, ST_D, GPR, i64>; +} // Predicates = [IsLA64] + +/// Atomic loads and stores + +def : Pat<(atomic_fence timm, timm), (DBAR 0)>; + +defm : LdPat<atomic_load_8, LD_B>; +defm : LdPat<atomic_load_16, LD_H>; +defm : LdPat<atomic_load_32, LD_W>; + +defm : StPat<atomic_store_8, ST_B, GPR, GRLenVT>; +defm : StPat<atomic_store_16, ST_H, GPR, GRLenVT>; +defm : StPat<atomic_store_32, ST_W, GPR, i32>, Requires<[IsLA32]>; +let Predicates = [IsLA64] in { +defm : LdPat<atomic_load_64, LD_D>; +defm : StPat<atomic_store_32, ST_W, GPR, i64>; +defm : StPat<atomic_store_64, ST_D, GPR, i64>; +} // Predicates = [IsLA64] + +/// Other pseudo-instructions + +// Pessimistically assume the stack pointer will be clobbered +let Defs = [R3], Uses = [R3] in { +def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_start timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), + [(callseq_end timm:$amt1, timm:$amt2)]>; +} // Defs = [R3], Uses = [R3] //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp index 7416c93b4d05..488c66f47863 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp @@ -22,6 +22,22 @@ using namespace llvm; +static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, + const AsmPrinter &AP) { + MCContext &Ctx = AP.OutContext; + + // TODO: Processing target flags. + + const MCExpr *ME = + MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx); + + if (!MO.isJTI() && !MO.isMBB() && MO.getOffset()) + ME = MCBinaryExpr::createAdd( + ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx); + + return MCOperand::createExpr(ME); +} + bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, MCOperand &MCOp, const AsmPrinter &AP) { @@ -41,12 +57,21 @@ bool llvm::lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO, case MachineOperand::MO_Immediate: MCOp = MCOperand::createImm(MO.getImm()); break; - // TODO: lower special operands - case MachineOperand::MO_MachineBasicBlock: + case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP); + break; case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_BlockAddress: + MCOp = lowerSymbolOperand(MO, AP.getSymbolPreferLocal(*MO.getGlobal()), AP); + break; + case MachineOperand::MO_MachineBasicBlock: + MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), AP); + break; case MachineOperand::MO_ExternalSymbol: - case MachineOperand::MO_ConstantPoolIndex: + MCOp = lowerSymbolOperand( + MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP); + break; + // TODO: lower special operands + case MachineOperand::MO_BlockAddress: case MachineOperand::MO_JumpTableIndex: break; } diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp index b9bae8e56304..05902ebb7ba6 100644 --- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp @@ -110,6 +110,28 @@ void LoongArchRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { + // TODO: this implementation is a temporary placeholder which does just + // enough to allow other aspects of code generation to be tested. + assert(SPAdj == 0 && "Unexpected non-zero SPAdj value"); - // TODO: Implement this when we have function calls + + MachineInstr &MI = *II; + MachineFunction &MF = *MI.getParent()->getParent(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + DebugLoc DL = MI.getDebugLoc(); + + int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + Register FrameReg; + StackOffset Offset = + TFI->getFrameIndexReference(MF, FrameIndex, FrameReg) + + StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm()); + + // Offsets must be encodable with a 12-bit immediate field. + if (!isInt<12>(Offset.getFixed())) { + report_fatal_error("Frame offsets outside of the signed 12-bit range is " + "not supported currently"); + } + + MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false); + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed()); } diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 3a1a46a9e624..468c4f43cb90 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -102,6 +102,7 @@ public: return getTM<LoongArchTargetMachine>(); } + void addIRPasses() override; bool addInstSelector() override; }; } // namespace @@ -111,6 +112,12 @@ LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { return new LoongArchPassConfig(*this, PM); } +void LoongArchPassConfig::addIRPasses() { + addPass(createAtomicExpandPass()); + + TargetPassConfig::addIRPasses(); +} + bool LoongArchPassConfig::addInstSelector() { addPass(createLoongArchISelDag(getLoongArchTargetMachine())); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index c733c194e6a2..e50761ab1e27 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Compiler.h" #define GET_INSTRINFO_MC_DESC +#define ENABLE_INSTR_PREDICATE_VERIFIER #include "LoongArchGenInstrInfo.inc" #define GET_REGINFO_MC_DESC diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h index e576b9a49cd6..a606ccdbc47c 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h @@ -46,6 +46,7 @@ createLoongArchELFObjectWriter(uint8_t OSABI, bool Is64Bit); // Defines symbolic names for LoongArch instructions. #define GET_INSTRINFO_ENUM +#define GET_INSTRINFO_MC_HELPER_DECLS #include "LoongArchGenInstrInfo.inc" #define GET_SUBTARGETINFO_ENUM |