diff options
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelLowering.h')
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.h | 264 |
1 files changed, 113 insertions, 151 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 0b0ad9229f0b..77605a3076a8 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H #include "RISCV.h" +#include "RISCVCallingConv.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" @@ -24,7 +25,6 @@ namespace llvm { class InstructionCost; class RISCVSubtarget; struct RISCVRegisterInfo; -class RVVArgDispatcher; namespace RISCVISD { // clang-format off @@ -34,6 +34,7 @@ enum NodeType : unsigned { SRET_GLUE, MRET_GLUE, CALL, + TAIL, /// Select with condition operator - This selects between a true value and /// a false value (ops #3 and #4) based on the boolean result of comparing /// the lhs and rhs (ops #0 and #1) of a conditional expression with the @@ -42,9 +43,30 @@ enum NodeType : unsigned { /// integer or floating point. SELECT_CC, BR_CC, + + /// Turn a pair of `i<xlen>`s into an even-odd register pair (`untyped`). + /// - Output: `untyped` even-odd register pair + /// - Input 0: `i<xlen>` low-order bits, for even register. + /// - Input 1: `i<xlen>` high-order bits, for odd register. + BuildGPRPair, + + /// Turn an even-odd register pair (`untyped`) into a pair of `i<xlen>`s. + /// - Output 0: `i<xlen>` low-order bits, from even register. + /// - Output 1: `i<xlen>` high-order bits, from odd register. + /// - Input: `untyped` even-odd register pair + SplitGPRPair, + + /// Turns a pair of `i32`s into an `f64`. Needed for rv32d/ilp32. + /// - Output: `f64`. + /// - Input 0: low-order bits (31-0) (as `i32`), for even register. + /// - Input 1: high-order bits (63-32) (as `i32`), for odd register. BuildPairF64, + + /// Turns a `f64` into a pair of `i32`s. Needed for rv32d/ilp32. + /// - Output 0: low-order bits (31-0) (as `i32`), from even register. + /// - Output 1: high-order bits (63-32) (as `i32`), from odd register. + /// - Input 0: `f64`. SplitF64, - TAIL, // Add the Lo 12 bits from an address. Selected to ADDI. ADD_LO, @@ -117,9 +139,6 @@ enum NodeType : unsigned { FCVT_W_RV64, FCVT_WU_RV64, - FP_ROUND_BF16, - FP_EXTEND_BF16, - // Rounds an FP value to its corresponding integer in the same FP format. // First operand is the value to round, the second operand is the largest // integer that can be represented exactly in the FP format. This will be @@ -128,10 +147,14 @@ enum NodeType : unsigned { FROUND, FCLASS, + FSGNJX, // Floating point fmax and fmin matching the RISC-V instruction semantics. FMAX, FMIN, + // Zfa fli instruction for constant materialization. + FLI, + // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)). // It takes a chain operand and another two target constant operands (the // CSR numbers of the low and high parts of the counter). @@ -181,6 +204,12 @@ enum NodeType : unsigned { // Truncates a RVV integer vector by one power-of-two. Carries both an extra // mask and VL operand. TRUNCATE_VECTOR_VL, + // Truncates a RVV integer vector by one power-of-two. If the value doesn't + // fit in the destination type, the result is saturated. These correspond to + // vnclip and vnclipu with a shift of 0. Carries both an extra mask and VL + // operand. + TRUNCATE_VECTOR_VL_SSAT, + TRUNCATE_VECTOR_VL_USAT, // Matches the semantics of vslideup/vslidedown. The first operand is the // pass-thru operand, the second is the source vector, the third is the XLenVT // index (either constant or non-constant), the fourth is the mask, the fifth @@ -231,7 +260,7 @@ enum NodeType : unsigned { VECREDUCE_FMIN_VL, VECREDUCE_FMAX_VL, - // Vector binary ops with a merge as a third operand, a mask as a fourth + // Vector binary ops with a passthru as a third operand, a mask as a fourth // operand, and VL as a fifth operand. ADD_VL, AND_VL, @@ -273,10 +302,6 @@ enum NodeType : unsigned { // Rounding averaging adds of unsigned integers. AVGCEILU_VL, - // Operands are (source, shift, merge, mask, roundmode, vl) - VNCLIPU_VL, - VNCLIP_VL, - MULHS_VL, MULHU_VL, FADD_VL, @@ -291,11 +316,9 @@ enum NodeType : unsigned { FABS_VL, FSQRT_VL, FCLASS_VL, - FCOPYSIGN_VL, // Has a merge operand + FCOPYSIGN_VL, // Has a passthru operand VFCVT_RTZ_X_F_VL, VFCVT_RTZ_XU_F_VL, - VFCVT_X_F_VL, - VFCVT_XU_F_VL, VFROUND_NOEXCEPT_VL, VFCVT_RM_X_F_VL, // Has a rounding mode operand. VFCVT_RM_XU_F_VL, // Has a rounding mode operand. @@ -319,7 +342,7 @@ enum NodeType : unsigned { VFWMSUB_VL, VFWNMSUB_VL, - // Widening instructions with a merge value a third operand, a mask as a + // Widening instructions with a passthru value a third operand, a mask as a // fourth operand, and VL as a fifth operand. VWMUL_VL, VWMULU_VL, @@ -346,10 +369,6 @@ enum NodeType : unsigned { VWMACCU_VL, VWMACCSU_VL, - // Narrowing logical shift right. - // Operands are (source, shift, passthru, mask, vl) - VNSRL_VL, - // Vector compare producing a mask. Fourth operand is input mask. Fifth // operand is VL. SETCC_VL, @@ -408,35 +427,12 @@ enum NodeType : unsigned { CZERO_EQZ, // vt.maskc for XVentanaCondOps. CZERO_NEZ, // vt.maskcn for XVentanaCondOps. - /// Software guarded BRIND node. Operand 0 is the chain operand and - /// operand 1 is the target address. + // Software guarded BRIND node. Operand 0 is the chain operand and + // operand 1 is the target address. SW_GUARDED_BRIND, - - // FP to 32 bit int conversions for RV64. These are used to keep track of the - // result being sign extended to 64 bit. These saturate out of range inputs. - STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, - STRICT_FCVT_WU_RV64, - STRICT_FADD_VL, - STRICT_FSUB_VL, - STRICT_FMUL_VL, - STRICT_FDIV_VL, - STRICT_FSQRT_VL, - STRICT_VFMADD_VL, - STRICT_VFNMADD_VL, - STRICT_VFMSUB_VL, - STRICT_VFNMSUB_VL, - STRICT_FP_ROUND_VL, - STRICT_FP_EXTEND_VL, - STRICT_VFNCVT_ROD_VL, - STRICT_SINT_TO_FP_VL, - STRICT_UINT_TO_FP_VL, - STRICT_VFCVT_RM_X_F_VL, - STRICT_VFCVT_RTZ_X_F_VL, - STRICT_VFCVT_RTZ_XU_F_VL, - STRICT_FSETCC_VL, - STRICT_FSETCCS_VL, - STRICT_VFROUND_NOEXCEPT_VL, - LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL, + // Software guarded calls for large code model + SW_GUARDED_CALL, + SW_GUARDED_TAIL, SF_VC_XV_SE, SF_VC_IV_SE, @@ -465,15 +461,48 @@ enum NodeType : unsigned { SF_VC_V_VVW_SE, SF_VC_V_FVW_SE, - // WARNING: Do not add anything in the end unless you want the node to - // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all - // opcodes will be thought as target memory ops! + // To avoid stack clash, allocation is performed by block and each block is + // probed. + PROBED_ALLOCA, + + // RISC-V vector tuple type version of INSERT_SUBVECTOR/EXTRACT_SUBVECTOR. + TUPLE_INSERT, + TUPLE_EXTRACT, - TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE, + // FP to 32 bit int conversions for RV64. These are used to keep track of the + // result being sign extended to 64 bit. These saturate out of range inputs. + FIRST_STRICTFP_OPCODE, + STRICT_FCVT_W_RV64 = FIRST_STRICTFP_OPCODE, + STRICT_FCVT_WU_RV64, + STRICT_FADD_VL, + STRICT_FSUB_VL, + STRICT_FMUL_VL, + STRICT_FDIV_VL, + STRICT_FSQRT_VL, + STRICT_VFMADD_VL, + STRICT_VFNMADD_VL, + STRICT_VFMSUB_VL, + STRICT_VFNMSUB_VL, + STRICT_FP_ROUND_VL, + STRICT_FP_EXTEND_VL, + STRICT_VFNCVT_ROD_VL, + STRICT_SINT_TO_FP_VL, + STRICT_UINT_TO_FP_VL, + STRICT_VFCVT_RM_X_F_VL, + STRICT_VFCVT_RTZ_X_F_VL, + STRICT_VFCVT_RTZ_XU_F_VL, + STRICT_FSETCC_VL, + STRICT_FSETCCS_VL, + STRICT_VFROUND_NOEXCEPT_VL, + LAST_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL, + + FIRST_MEMORY_OPCODE, + TH_LWD = FIRST_MEMORY_OPCODE, TH_LWUD, TH_LDD, TH_SWD, TH_SDD, + LAST_MEMORY_OPCODE = TH_SDD, }; // clang-format on } // namespace RISCVISD @@ -510,17 +539,9 @@ public: SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override; - /// Return true if the (vector) instruction I will be lowered to an instruction - /// with a scalar splat operand for the given Operand number. - bool canSplatOperand(Instruction *I, int Operand) const; - /// Return true if a vector instruction will lower to a target instruction - /// able to splat the given operand. - bool canSplatOperand(unsigned Opcode, int Operand) const; - bool shouldSinkOperands(Instruction *I, - SmallVectorImpl<Use *> &Ops) const override; bool shouldScalarizeBinop(SDValue VecOp) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; + int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, @@ -537,6 +558,11 @@ public: MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override; + /// Return the number of registers for a given MVT, for inline assembly + unsigned + getNumRegisters(LLVMContext &Context, EVT VT, + std::optional<MVT> RegisterVT = std::nullopt) const override; + /// Return the number of registers for a given MVT, ensuring vectors are /// treated as a series of gpr sized integers. unsigned getNumRegistersForCallingConv(LLVMContext &Context, @@ -677,9 +703,11 @@ public: bool preferZeroCompareBranch() const override { return true; } - bool shouldInsertFencesForAtomic(const Instruction *I) const override { - return isa<LoadInst>(I) || isa<StoreInst>(I); - } + // Note that one specific case requires fence insertion for an + // AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather + // than this hook due to limitations in the interface here. + bool shouldInsertFencesForAtomic(const Instruction *I) const override; + Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override; Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, @@ -720,7 +748,7 @@ public: getExceptionSelectorRegister(const Constant *PersonalityFn) const override; bool shouldExtendTypeInLibCall(EVT Type) const override; - bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override; + bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const override; /// Returns the register with the specified architectural or ABI name. This /// method is necessary to lower the llvm.read_register.* and @@ -738,7 +766,7 @@ public: bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - LLVMContext &Context) const override; + LLVMContext &Context, const Type *RetTy) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, @@ -876,11 +904,11 @@ public: bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II, - LoadInst *LI) const override; + bool lowerDeinterleaveIntrinsicToLoad( + LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const override; - bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, - StoreInst *SI) const override; + bool lowerInterleaveIntrinsicToStore( + StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override; bool supportKCFIBundles() const override { return true; } @@ -891,16 +919,13 @@ public: MachineBasicBlock::instr_iterator &MBBI, const TargetInstrInfo *TII) const override; - /// RISCVCCAssignFn - This target-specific function extends the default - /// CCValAssign with additional information used to lower RISC-V calling - /// conventions. - typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI, - unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State, - bool IsFixed, bool IsRet, Type *OrigTy, - const RISCVTargetLowering &TLI, - RVVArgDispatcher &RVVDispatcher); + /// True if stack clash protection is enabled for this functions. + bool hasInlineStackProbe(const MachineFunction &MF) const override; + + unsigned getStackProbeSize(const MachineFunction &MF, Align StackAlign) const; + + MachineBasicBlock *emitDynamicProbedAlloc(MachineInstr &MI, + MachineBasicBlock *MBB) const; private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, @@ -919,6 +944,7 @@ private: SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const; + SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const; @@ -958,6 +984,7 @@ private: SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; @@ -973,6 +1000,7 @@ private: SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVPMergeMask(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const; @@ -992,9 +1020,14 @@ private: SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + bool isEligibleForTailCallOptimization( CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF, const SmallVector<CCValAssign, 16> &ArgLocs) const; @@ -1042,82 +1075,11 @@ private: SDValue emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue Start, SDValue End, SDValue Flags, SDLoc DL) const; -}; - -/// As per the spec, the rules for passing vector arguments are as follows: -/// -/// 1. For the first vector mask argument, use v0 to pass it. -/// 2. For vector data arguments or rest vector mask arguments, starting from -/// the v8 register, if a vector register group between v8-v23 that has not been -/// allocated can be found and the first register number is a multiple of LMUL, -/// then allocate this vector register group to the argument and mark these -/// registers as allocated. Otherwise, pass it by reference and are replaced in -/// the argument list with the address. -/// 3. For tuple vector data arguments, starting from the v8 register, if -/// NFIELDS consecutive vector register groups between v8-v23 that have not been -/// allocated can be found and the first register number is a multiple of LMUL, -/// then allocate these vector register groups to the argument and mark these -/// registers as allocated. Otherwise, pass it by reference and are replaced in -/// the argument list with the address. -class RVVArgDispatcher { -public: - static constexpr unsigned NumArgVRs = 16; - - struct RVVArgInfo { - unsigned NF; - MVT VT; - bool FirstVMask = false; - }; - - template <typename Arg> - RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI, - ArrayRef<Arg> ArgList) - : MF(MF), TLI(TLI) { - constructArgInfos(ArgList); - compute(); - } - - RVVArgDispatcher() = default; - - MCPhysReg getNextPhysReg(); -private: - SmallVector<RVVArgInfo, 4> RVVArgInfos; - SmallVector<MCPhysReg, 4> AllocatedPhysRegs; - - const MachineFunction *MF = nullptr; - const RISCVTargetLowering *TLI = nullptr; - - unsigned CurIdx = 0; - - template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret); - void compute(); - void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1, - unsigned StartReg = 0); + std::pair<const TargetRegisterClass *, uint8_t> + findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override; }; -namespace RISCV { - -bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, - MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, - bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - RVVArgDispatcher &RVVDispatcher); - -bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, - MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, - ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed, - bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, - RVVArgDispatcher &RVVDispatcher); - -bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, - CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, - CCState &State); - -ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI); - -} // end namespace RISCV - namespace RISCVVIntrinsicsTable { struct RISCVVIntrinsicInfo { |
