diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp | 863 |
1 files changed, 851 insertions, 12 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp index aa6c3c08bd75..ab720545dd83 100644 --- a/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "VEISelLowering.h" +#include "MCTargetDesc/VEMCExpr.h" +#include "VEMachineFunctionInfo.h" #include "VERegisterInfo.h" #include "VETargetMachine.h" #include "llvm/ADT/StringSwitch.h" @@ -36,14 +38,37 @@ using namespace llvm; // Calling Convention Implementation //===----------------------------------------------------------------------===// +static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + switch (LocVT.SimpleTy) { + case MVT::f32: { + // Allocate stack like below + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Use align=8 for dummy area to align the beginning of these 2 area. + State.AllocateStack(4, Align(8)); // for empty area + // Use align=4 for value to place it at just after the dummy area. + unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + return true; + } + default: + return false; + } +} + #include "VEGenCallingConv.inc" bool VETargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { - assert(!IsVarArg && "TODO implement var args"); - assert(Outs.empty() && "TODO implement return values"); - return true; // TODO support more than 'ret void' + CCAssignFn *RetCC = RetCC_VE; + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC); } SDValue @@ -52,12 +77,57 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - assert(!IsVarArg && "TODO implement var args"); - assert(Outs.empty() && "TODO implement return values"); - assert(OutVals.empty() && "TODO implement return values"); + // CCValAssign - represent the assignment of the return value to locations. + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, RetCC_VE); + + SDValue Flag; SmallVector<SDValue, 4> RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue OutVal = OutVals[i]; + + // Integer return values must be sign or zero extended by the callee. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + case CCValAssign::SExt: + OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::ZExt: + OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::AExt: + OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); + break; + default: + llvm_unreachable("Unknown loc info!"); + } + + assert(!VA.needsCustom() && "Unexpected custom lowering"); + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps); } @@ -65,8 +135,89 @@ SDValue VETargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - assert(!IsVarArg && "TODO implement var args"); - assert(Ins.empty() && "TODO implement input arguments"); + MachineFunction &MF = DAG.getMachineFunction(); + + // Get the base offset of the incoming arguments stack space. + unsigned ArgsBaseOffset = 176; + // Get the size of the preserved arguments area + unsigned ArgsPreserved = 64; + + // Analyze arguments according to CC_VE. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + // Allocate the preserved area first. + CCInfo.AllocateStack(ArgsPreserved, Align(8)); + // We already allocated the preserved area, so the stack offset computed + // by CC_VE would be correct now. + CCInfo.AnalyzeFormalArguments(Ins, CC_VE); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + // This argument is passed in a register. + // All integer register arguments are promoted by the caller to i64. + + // Create a virtual register for the promoted live-in value. + unsigned VReg = + MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); + SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, MVT::i32)); + + // The caller promoted the argument, so insert an Assert?ext SDNode so we + // won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the argument type. + if (VA.isExtInLoc()) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); + + InVals.push_back(Arg); + continue; + } + + // The registers are exhausted. This argument was passed on the stack. + assert(VA.isMemLoc()); + // The CC_VE_Full/Half functions compute stack offsets relative to the + // beginning of the arguments area at %fp+176. + unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset; + unsigned ValSize = VA.getValVT().getSizeInBits() / 8; + int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true); + InVals.push_back( + DAG.getLoad(VA.getValVT(), DL, Chain, + DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())), + MachinePointerInfo::getFixedStack(MF, FI))); + } + + if (!IsVarArg) + return Chain; + + // This function takes variable arguments, some of which may have been passed + // in registers %s0-%s8. + // + // The va_start intrinsic needs to know the offset to the first variable + // argument. + // TODO: need to calculate offset correctly once we support f128. + unsigned ArgOffset = ArgLocs.size() * 8; + VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); + // Skip the 176 bytes of register save area. + FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset); + return Chain; } @@ -78,7 +229,7 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT, .Case("sp", VE::SX11) // Stack pointer .Case("fp", VE::SX9) // Frame pointer .Case("sl", VE::SX8) // Stack limit - .Case("lr", VE::SX10) // Link regsiter + .Case("lr", VE::SX10) // Link register .Case("tp", VE::SX14) // Thread pointer .Case("outer", VE::SX12) // Outer regiser .Case("info", VE::SX17) // Info area register @@ -96,6 +247,314 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT, // TargetLowering Implementation //===----------------------------------------------------------------------===// +SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const { + SelectionDAG &DAG = CLI.DAG; + SDLoc DL = CLI.DL; + SDValue Chain = CLI.Chain; + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // VE target does not yet support tail call optimization. + CLI.IsTailCall = false; + + // Get the base offset of the outgoing arguments stack space. + unsigned ArgsBaseOffset = 176; + // Get the size of the preserved arguments area + unsigned ArgsPreserved = 8 * 8u; + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + // Allocate the preserved area first. + CCInfo.AllocateStack(ArgsPreserved, Align(8)); + // We already allocated the preserved area, so the stack offset computed + // by CC_VE would be correct now. + CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE); + + // VE requires to use both register and stack for varargs or no-prototyped + // functions. + bool UseBoth = CLI.IsVarArg; + + // Analyze operands again if it is required to store BOTH. + SmallVector<CCValAssign, 16> ArgLocs2; + CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), + ArgLocs2, *DAG.getContext()); + if (UseBoth) + CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2); + + // Get the size of the outgoing arguments stack space requirement. + unsigned ArgsSize = CCInfo.getNextStackOffset(); + + // Keep stack frames 16-byte aligned. + ArgsSize = alignTo(ArgsSize, 16); + + // Adjust the stack pointer to make room for the arguments. + // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls + // with more than 6 arguments. + Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL); + + // Collect the set of registers to pass to the function and their values. + // This will be emitted as a sequence of CopyToReg nodes glued to the call + // instruction. + SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + + // Collect chains from all the memory opeations that copy arguments to the + // stack. They must follow the stack pointer adjustment above and precede the + // call instruction itself. + SmallVector<SDValue, 8> MemOpChains; + + // VE needs to get address of callee function in a register + // So, prepare to copy it to SX12 here. + + // If the callee is a GlobalAddress node (quite common, every direct call is) + // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. + // Likewise ExternalSymbol -> TargetExternalSymbol. + SDValue Callee = CLI.Callee; + + bool IsPICCall = isPositionIndependent(); + + // PC-relative references to external symbols should go through $stub. + // If so, we need to prepare GlobalBaseReg first. + const TargetMachine &TM = DAG.getTarget(); + const Module *Mod = DAG.getMachineFunction().getFunction().getParent(); + const GlobalValue *GV = nullptr; + auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee); + if (CalleeG) + GV = CalleeG->getGlobal(); + bool Local = TM.shouldAssumeDSOLocal(*Mod, GV); + bool UsePlt = !Local; + MachineFunction &MF = DAG.getMachineFunction(); + + // Turn GlobalAddress/ExternalSymbol node into a value node + // containing the address of them here. + if (CalleeG) { + if (IsPICCall) { + if (UsePlt) + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); + Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); + } else { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { + if (IsPICCall) { + if (UsePlt) + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0); + Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee); + } else { + Callee = + makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } + } + + RegsToPass.push_back(std::make_pair(VE::SX12, Callee)); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + SDValue Arg = CLI.OutVals[i]; + + // Promote the value if needed. + switch (VA.getLocInfo()) { + default: + llvm_unreachable("Unknown location info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (!UseBoth) + continue; + VA = ArgLocs2[i]; + } + + assert(VA.isMemLoc()); + + // Create a store off the stack pointer for this argument. + SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT); + // The argument area starts at %fp+176 in the callee frame, + // %sp+176 in ours. + SDValue PtrOff = + DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL); + PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff); + MemOpChains.push_back( + DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo())); + } + + // Emit all stores, make sure they occur before the call. + if (!MemOpChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); + + // Build a sequence of CopyToReg nodes glued together with token chain and + // glue operands which copy the outgoing args into registers. The InGlue is + // necessary since all emitted instructions must be stuck together in order + // to pass the live physical registers. + SDValue InGlue; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first, + RegsToPass[i].second, InGlue); + InGlue = Chain.getValue(1); + } + + // Build the operands for the call instruction itself. + SmallVector<SDValue, 8> Ops; + Ops.push_back(Chain); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + // Add a register mask operand representing the call-preserved registers. + const VERegisterInfo *TRI = Subtarget->getRegisterInfo(); + const uint32_t *Mask = + TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + + // Make sure the CopyToReg nodes are glued to the call instruction which + // consumes the registers. + if (InGlue.getNode()) + Ops.push_back(InGlue); + + // Now the call itself. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops); + InGlue = Chain.getValue(1); + + // Revert the stack pointer immediately after the call. + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true), + DAG.getIntPtrConstant(0, DL, true), InGlue, DL); + InGlue = Chain.getValue(1); + + // Now extract the return values. This is more or less the same as + // LowerFormalArguments. + + // Assign locations to each value returned by this call. + SmallVector<CCValAssign, 16> RVLocs; + CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + + // Set inreg flag manually for codegen generated library calls that + // return float. + if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB) + CLI.Ins[0].Flags.setInReg(); + + RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + unsigned Reg = VA.getLocReg(); + + // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can + // reside in the same register in the high and low bits. Reuse the + // CopyFromReg previous node to avoid duplicate copies. + SDValue RV; + if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1))) + if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg) + RV = Chain.getValue(0); + + // But usually we'll create a new CopyFromReg for a different register. + if (!RV.getNode()) { + RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue); + Chain = RV.getValue(1); + InGlue = Chain.getValue(2); + } + + // Get the high bits for i32 struct elements. + if (VA.getValVT() == MVT::i32 && VA.needsCustom()) + RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV, + DAG.getConstant(32, DL, MVT::i32)); + + // The callee promoted the return value, so insert an Assert?ext SDNode so + // we won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the return value type. + if (VA.isExtInLoc()) + RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV); + + InVals.push_back(RV); + } + + return Chain; +} + +/// isFPImmLegal - Returns true if the target can instruction select the +/// specified FP immediate natively. If false, the legalizer will +/// materialize the FP immediate as a load from a constant pool. +bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, + bool ForCodeSize) const { + return VT == MVT::f32 || VT == MVT::f64; +} + +/// Determine if the target supports unaligned memory accesses. +/// +/// This function returns true if the target allows unaligned memory accesses +/// of the specified type in the given address space. If true, it also returns +/// whether the unaligned memory access is "fast" in the last argument by +/// reference. This is used, for example, in situations where an array +/// copy/move/set is converted to a sequence of store operations. Its use +/// helps to ensure that such replacements don't generate code that causes an +/// alignment error (trap) on the target machine. +bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned AddrSpace, + unsigned Align, + MachineMemOperand::Flags, + bool *Fast) const { + if (Fast) { + // It's fast anytime on VE + *Fast = true; + } + return true; +} + +bool VETargetLowering::hasAndNot(SDValue Y) const { + EVT VT = Y.getValueType(); + + // VE doesn't have vector and not instruction. + if (VT.isVector()) + return false; + + // VE allows different immediate values for X and Y where ~X & Y. + // Only simm7 works for X, and only mimm works for Y on VE. However, this + // function is used to check whether an immediate value is OK for and-not + // instruction as both X and Y. Generating additional instruction to + // retrieve an immediate value is no good since the purpose of this + // function is to convert a series of 3 instructions to another series of + // 3 instructions with better parallelism. Therefore, we return false + // for all immediate values now. + // FIXME: Change hasAndNot function to have two operands to make it work + // correctly with Aurora VE. + if (isa<ConstantSDNode>(Y)) + return false; + + // It's ok for generic registers. + return true; +} + VETargetLowering::VETargetLowering(const TargetMachine &TM, const VESubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -108,7 +567,87 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, setBooleanVectorContents(ZeroOrOneBooleanContent); // Set up the register classes. + addRegisterClass(MVT::i32, &VE::I32RegClass); addRegisterClass(MVT::i64, &VE::I64RegClass); + addRegisterClass(MVT::f32, &VE::F32RegClass); + addRegisterClass(MVT::f64, &VE::I64RegClass); + + /// Load & Store { + for (MVT FPVT : MVT::fp_valuetypes()) { + for (MVT OtherFPVT : MVT::fp_valuetypes()) { + // Turn FP extload into load/fpextend + setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand); + + // Turn FP truncstore into trunc + store. + setTruncStoreAction(FPVT, OtherFPVT, Expand); + } + } + + // VE doesn't have i1 sign extending load + for (MVT VT : MVT::integer_valuetypes()) { + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); + setTruncStoreAction(VT, MVT::i1, Expand); + } + /// } Load & Store + + // Custom legalize address nodes into LO/HI parts. + MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); + setOperationAction(ISD::BlockAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalAddress, PtrVT, Custom); + setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); + + /// VAARG handling { + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // VAARG needs to be lowered to access with 8 bytes alignment. + setOperationAction(ISD::VAARG, MVT::Other, Custom); + // Use the default implementation. + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + /// } VAARG handling + + /// Stack { + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); + /// } Stack + + /// Int Ops { + for (MVT IntVT : {MVT::i32, MVT::i64}) { + // VE has no REM or DIVREM operations. + setOperationAction(ISD::UREM, IntVT, Expand); + setOperationAction(ISD::SREM, IntVT, Expand); + setOperationAction(ISD::SDIVREM, IntVT, Expand); + setOperationAction(ISD::UDIVREM, IntVT, Expand); + + setOperationAction(ISD::CTTZ, IntVT, Expand); + setOperationAction(ISD::ROTL, IntVT, Expand); + setOperationAction(ISD::ROTR, IntVT, Expand); + + // Use isel patterns for i32 and i64 + setOperationAction(ISD::BSWAP, IntVT, Legal); + setOperationAction(ISD::CTLZ, IntVT, Legal); + setOperationAction(ISD::CTPOP, IntVT, Legal); + + // Use isel patterns for i64, Promote i32 + LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal; + setOperationAction(ISD::BITREVERSE, IntVT, Act); + } + /// } Int Ops + + /// Conversion { + // VE doesn't have instructions for fp<->uint, so expand them by llvm + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64 + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64 + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); + + // fp16 not supported + for (MVT FPVT : MVT::fp_valuetypes()) { + setOperationAction(ISD::FP16_TO_FP, FPVT, Expand); + setOperationAction(ISD::FP_TO_FP16, FPVT, Expand); + } + /// } Conversion setStackPointerRegisterToSaveRestore(VE::SX11); @@ -122,16 +661,316 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM, } const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const { +#define TARGET_NODE_CASE(NAME) \ + case VEISD::NAME: \ + return "VEISD::" #NAME; switch ((VEISD::NodeType)Opcode) { case VEISD::FIRST_NUMBER: break; - case VEISD::RET_FLAG: - return "VEISD::RET_FLAG"; + TARGET_NODE_CASE(Lo) + TARGET_NODE_CASE(Hi) + TARGET_NODE_CASE(GETFUNPLT) + TARGET_NODE_CASE(GETSTACKTOP) + TARGET_NODE_CASE(GETTLSADDR) + TARGET_NODE_CASE(CALL) + TARGET_NODE_CASE(RET_FLAG) + TARGET_NODE_CASE(GLOBAL_BASE_REG) } +#undef TARGET_NODE_CASE return nullptr; } EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &, EVT VT) const { - return MVT::i64; + return MVT::i32; +} + +// Convert to a target node and set target flags. +SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF, + SelectionDAG &DAG) const { + if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) + return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), + GA->getValueType(0), GA->getOffset(), TF); + + if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) + return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(), + 0, TF); + + if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) + return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0), + TF); + + llvm_unreachable("Unhandled address SDNode"); +} + +// Split Op into high and low parts according to HiTF and LoTF. +// Return an ADD node combining the parts. +SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG)); + SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG)); + return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo); +} + +// Build SDNodes for producing an address from a GlobalAddress, ConstantPool, +// or ExternalSymbol SDNode. +SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = Op.getValueType(); + + // Handle PIC mode first. VE needs a got load for every variable! + if (isPositionIndependent()) { + // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this + // function has calls. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setHasCalls(true); + auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op); + + if (isa<ConstantPoolSDNode>(Op) || + (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) { + // Create following instructions for local linkage PIC code. + // lea %s35, %gotoff_lo(.LCPI0_0) + // and %s35, %s35, (32)0 + // lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35) + // adds.l %s35, %s15, %s35 ; %s15 is GOT + // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32, + VEMCExpr::VK_VE_GOTOFF_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); + return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); + } + // Create following instructions for not local linkage PIC code. + // lea %s35, %got_lo(.LCPI0_0) + // and %s35, %s35, (32)0 + // lea.sl %s35, %got_hi(.LCPI0_0)(%s35) + // adds.l %s35, %s15, %s35 ; %s15 is GOT + // ld %s35, (,%s35) + // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15) + SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32, + VEMCExpr::VK_VE_GOT_LO32, DAG); + SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT); + SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo); + return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr, + MachinePointerInfo::getGOT(DAG.getMachineFunction())); + } + + // This is one of the absolute code models. + switch (getTargetMachine().getCodeModel()) { + default: + llvm_unreachable("Unsupported absolute code model"); + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Large: + // abs64. + return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG); + } +} + +/// Custom Lower { + +SDValue VETargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue VETargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + return makeAddress(Op, DAG); +} + +SDValue +VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op, + SelectionDAG &DAG) const { + SDLoc dl(Op); + + // Generate the following code: + // t1: ch,glue = callseq_start t0, 0, 0 + // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1 + // t3: ch,glue = callseq_end t2, 0, 0, t2:2 + // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1 + SDValue Label = withTargetFlags(Op, 0, DAG); + EVT PtrVT = Op.getValueType(); + + // Lowering the machine isd will make sure everything is in the right + // location. + SDValue Chain = DAG.getEntryNode(); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask( + DAG.getMachineFunction(), CallingConv::C); + Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl); + SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)}; + Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true), + DAG.getIntPtrConstant(0, dl, true), + Chain.getValue(1), dl); + Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1)); + + // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls. + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setHasCalls(true); + + // Also generate code to prepare a GOT register if it is PIC. + if (isPositionIndependent()) { + MachineFunction &MF = DAG.getMachineFunction(); + Subtarget->getInstrInfo()->getGlobalBaseReg(&MF); + } + + return Chain; +} + +SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op, + SelectionDAG &DAG) const { + // The current implementation of nld (2.26) doesn't allow local exec model + // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always + // generate the general dynamic model code sequence. + // + // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf + return LowerToTLSGeneralDynamicModel(Op, DAG); +} + +SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>(); + auto PtrVT = getPointerTy(DAG.getDataLayout()); + + // Need frame address to find the address of VarArgsFrameIndex. + MF.getFrameInfo().setFrameAddressIsTaken(true); + + // vastart just stores the address of the VarArgsFrameIndex slot into the + // memory location argument. + SDLoc DL(Op); + SDValue Offset = + DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT), + DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL)); + const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1), + MachinePointerInfo(SV)); +} + +SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { + SDNode *Node = Op.getNode(); + EVT VT = Node->getValueType(0); + SDValue InChain = Node->getOperand(0); + SDValue VAListPtr = Node->getOperand(1); + EVT PtrVT = VAListPtr.getValueType(); + const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + SDLoc DL(Node); + SDValue VAList = + DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV)); + SDValue Chain = VAList.getValue(1); + SDValue NextPtr; + + if (VT == MVT::f32) { + // float --> need special handling like below. + // 0 4 + // +------+------+ + // | empty| float| + // +------+------+ + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + // Then, adjust VAList. + unsigned InternalOffset = 4; + VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, + DAG.getConstant(InternalOffset, DL, PtrVT)); + } else { + // Increment the pointer, VAList, by 8 to the next vaarg. + NextPtr = + DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL)); + } + + // Store the incremented VAList to the legalized pointer. + InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV)); + + // Load the actual argument out of the pointer VAList. + // We can't count on greater alignment than the word size. + return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(), + std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8); +} + +SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const { + // Generate following code. + // (void)__llvm_grow_stack(size); + // ret = GETSTACKTOP; // pseudo instruction + SDLoc DL(Op); + + // Get the inputs. + SDNode *Node = Op.getNode(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + MaybeAlign Alignment(Op.getConstantOperandVal(2)); + EVT VT = Node->getValueType(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); + + const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); + Align StackAlign = TFI.getStackAlign(); + bool NeedsAlign = Alignment.valueOrOne() > StackAlign; + + // Prepare arguments + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Size; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + if (NeedsAlign) { + Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT); + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Args.push_back(Entry); + } + Type *RetTy = Type::getVoidTy(*DAG.getContext()); + + EVT PtrVT = Op.getValueType(); + SDValue Callee; + if (NeedsAlign) { + Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0); + } else { + Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0); + } + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(Chain) + .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args)) + .setDiscardResult(true); + std::pair<SDValue, SDValue> pair = LowerCallTo(CLI); + Chain = pair.second; + SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain); + if (NeedsAlign) { + Result = DAG.getNode(ISD::ADD, DL, VT, Result, + DAG.getConstant((Alignment->value() - 1ULL), DL, VT)); + Result = DAG.getNode(ISD::AND, DL, VT, Result, + DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT)); + } + // Chain = Result.getValue(1); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), + DAG.getIntPtrConstant(0, DL, true), SDValue(), DL); + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, DL); +} + +SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: + llvm_unreachable("Should not custom lower this!"); + case ISD::BlockAddress: + return LowerBlockAddress(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + return lowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::GlobalTLSAddress: + return LowerGlobalTLSAddress(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::VAARG: + return LowerVAARG(Op, DAG); + } } +/// } Custom Lower |