diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp | 306 |
1 files changed, 201 insertions, 105 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 4064a983099c..f06afdbcea9e 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -205,7 +205,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( for (auto T : {MVT::i8, MVT::i16, MVT::i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Action); } - for (auto T : MVT::integer_vector_valuetypes()) + for (auto T : MVT::integer_fixedlen_vector_valuetypes()) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); // Dynamic stack allocation: use the default expansion. @@ -228,7 +228,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // - Floating-point extending loads. // - Floating-point truncating stores. // - i1 extending loads. - // - extending/truncating SIMD loads/stores + // - truncating SIMD stores and most extending loads setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); for (auto T : MVT::integer_valuetypes()) @@ -237,7 +237,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( if (Subtarget->hasSIMD128()) { for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64}) { - for (auto MemT : MVT::vector_valuetypes()) { + for (auto MemT : MVT::fixedlen_vector_valuetypes()) { if (MVT(T) != MemT) { setTruncStoreAction(T, MemT, Expand); for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) @@ -245,6 +245,14 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( } } } + // But some vector extending loads are legal + if (Subtarget->hasUnimplementedSIMD128()) { + for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) { + setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal); + setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal); + setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal); + } + } } // Don't do anything clever with build_pairs @@ -259,16 +267,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setMaxAtomicSizeInBitsSupported(64); - if (Subtarget->hasBulkMemory()) { - // Use memory.copy and friends over multiple loads and stores - MaxStoresPerMemcpy = 1; - MaxStoresPerMemcpyOptSize = 1; - MaxStoresPerMemmove = 1; - MaxStoresPerMemmoveOptSize = 1; - MaxStoresPerMemset = 1; - MaxStoresPerMemsetOptSize = 1; - } - // Override the __gnu_f2h_ieee/__gnu_h2f_ieee names so that the f32 name is // consistent with the f64 and f128 names. setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2"); @@ -337,8 +335,8 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, bool Float64, unsigned LoweredOpcode) { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); - unsigned OutReg = MI.getOperand(0).getReg(); - unsigned InReg = MI.getOperand(1).getReg(); + Register OutReg = MI.getOperand(0).getReg(); + Register InReg = MI.getOperand(1).getReg(); unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32; unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32; @@ -396,9 +394,9 @@ static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL, // For unsigned numbers, we have to do a separate comparison with zero. if (IsUnsigned) { Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg)); - unsigned SecondCmpReg = + Register SecondCmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - unsigned AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); BuildMI(BB, DL, TII.get(FConst), Tmp1) .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0))); BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1); @@ -550,6 +548,16 @@ bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, return true; } +bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { + if (!Subtarget->hasUnimplementedSIMD128()) + return false; + MVT ExtT = ExtVal.getSimpleValueType(); + MVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getSimpleValueType(0); + return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) || + (ExtT == MVT::v4i32 && MemT == MVT::v4i16) || + (ExtT == MVT::v2i64 && MemT == MVT::v2i32); +} + EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C, EVT VT) const { @@ -569,7 +577,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); // atomic.notify instruction does not really load the memory specified with // this argument, but MachineMemOperand should either be load or store, so // we set this to a load. @@ -583,7 +591,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i32; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 4; + Info.align = Align(4); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; case Intrinsic::wasm_atomic_wait_i64: @@ -591,7 +599,7 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = MVT::i64; Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align = 8; + Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; default: @@ -623,7 +631,8 @@ static bool callingConvSupported(CallingConv::ID CallConv) { CallConv == CallingConv::Cold || CallConv == CallingConv::PreserveMost || CallConv == CallingConv::PreserveAll || - CallConv == CallingConv::CXX_FAST_TLS; + CallConv == CallingConv::CXX_FAST_TLS || + CallConv == CallingConv::WASM_EmscriptenInvoke; } SDValue @@ -644,13 +653,36 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (CLI.IsPatchPoint) fail(DL, DAG, "WebAssembly doesn't support patch point yet"); - // Fail if tail calls are required but not enabled - if (!Subtarget->hasTailCall()) { - if ((CallConv == CallingConv::Fast && CLI.IsTailCall && - MF.getTarget().Options.GuaranteedTailCallOpt) || - (CLI.CS && CLI.CS.isMustTailCall())) - fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); - CLI.IsTailCall = false; + if (CLI.IsTailCall) { + bool MustTail = CLI.CS && CLI.CS.isMustTailCall(); + if (Subtarget->hasTailCall() && !CLI.IsVarArg) { + // Do not tail call unless caller and callee return types match + const Function &F = MF.getFunction(); + const TargetMachine &TM = getTargetMachine(); + Type *RetTy = F.getReturnType(); + SmallVector<MVT, 4> CallerRetTys; + SmallVector<MVT, 4> CalleeRetTys; + computeLegalValueVTs(F, TM, RetTy, CallerRetTys); + computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys); + bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() && + std::equal(CallerRetTys.begin(), CallerRetTys.end(), + CalleeRetTys.begin()); + if (!TypesMatch) { + // musttail in this case would be an LLVM IR validation failure + assert(!MustTail); + CLI.IsTailCall = false; + } + } else { + CLI.IsTailCall = false; + if (MustTail) { + if (CLI.IsVarArg) { + // The return would pop the argument buffer + fail(DL, DAG, "WebAssembly does not support varargs tail calls"); + } else { + fail(DL, DAG, "WebAssembly 'tail-call' feature not enabled"); + } + } + } } SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; @@ -659,6 +691,16 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + + // The generic code may have added an sret argument. If we're lowering an + // invoke function, the ABI requires that the function pointer be the first + // argument, so we may have to swap the arguments. + if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 && + Outs[0].Flags.isSRet()) { + std::swap(Outs[0], Outs[1]); + std::swap(OutVals[0], OutVals[1]); + } + unsigned NumFixedArgs = 0; for (unsigned I = 0; I < Outs.size(); ++I) { const ISD::OutputArg &Out = Outs[I]; @@ -810,8 +852,8 @@ bool WebAssemblyTargetLowering::CanLowerReturn( CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/) const { - // WebAssembly can't currently handle returning tuples. - return Outs.size() <= 1; + // WebAssembly can only handle returning tuples with multivalue enabled + return Subtarget->hasMultivalue() || Outs.size() <= 1; } SDValue WebAssemblyTargetLowering::LowerReturn( @@ -819,7 +861,8 @@ SDValue WebAssemblyTargetLowering::LowerReturn( const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); + assert((Subtarget->hasMultivalue() || Outs.size() <= 1) && + "MVP WebAssembly can only return up to one value"); if (!callingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); @@ -881,7 +924,7 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( // the buffer is passed as an argument. if (IsVarArg) { MVT PtrVT = getPointerTy(MF.getDataLayout()); - unsigned VarargVreg = + Register VarargVreg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); MFI->setVarargBufferVreg(VarargVreg); Chain = DAG.getCopyToReg( @@ -1022,8 +1065,9 @@ SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op, return SDValue(); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); + MakeLibCallOptions CallOptions; return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(), - {DAG.getConstant(Depth, DL, MVT::i32)}, false, DL) + {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL) .first; } @@ -1037,7 +1081,7 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - unsigned FP = + Register FP = Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); } @@ -1249,68 +1293,116 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, const EVT VecT = Op.getValueType(); const EVT LaneT = Op.getOperand(0).getValueType(); const size_t Lanes = Op.getNumOperands(); + bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8; + + // BUILD_VECTORs are lowered to the instruction that initializes the highest + // possible number of lanes at once followed by a sequence of replace_lane + // instructions to individually initialize any remaining lanes. + + // TODO: Tune this. For example, lanewise swizzling is very expensive, so + // swizzled lanes should be given greater weight. + + // TODO: Investigate building vectors by shuffling together vectors built by + // separately specialized means. + auto IsConstant = [](const SDValue &V) { return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; }; - // Find the most common operand, which is approximately the best to splat - using Entry = std::pair<SDValue, size_t>; - SmallVector<Entry, 16> ValueCounts; - size_t NumConst = 0, NumDynamic = 0; - for (const SDValue &Lane : Op->op_values()) { - if (Lane.isUndef()) { - continue; - } else if (IsConstant(Lane)) { - NumConst++; - } else { - NumDynamic++; - } - auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(), - [&Lane](Entry A) { return A.first == Lane; }); - if (CountIt == ValueCounts.end()) { - ValueCounts.emplace_back(Lane, 1); + // Returns the source vector and index vector pair if they exist. Checks for: + // (extract_vector_elt + // $src, + // (sign_extend_inreg (extract_vector_elt $indices, $i)) + // ) + auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { + auto Bail = std::make_pair(SDValue(), SDValue()); + if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleSrc = Lane->getOperand(0); + const SDValue &IndexExt = Lane->getOperand(1); + if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) + return Bail; + const SDValue &Index = IndexExt->getOperand(0); + if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SwizzleIndices = Index->getOperand(0); + if (SwizzleSrc.getValueType() != MVT::v16i8 || + SwizzleIndices.getValueType() != MVT::v16i8 || + Index->getOperand(1)->getOpcode() != ISD::Constant || + Index->getConstantOperandVal(1) != I) + return Bail; + return std::make_pair(SwizzleSrc, SwizzleIndices); + }; + + using ValueEntry = std::pair<SDValue, size_t>; + SmallVector<ValueEntry, 16> SplatValueCounts; + + using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>; + SmallVector<SwizzleEntry, 16> SwizzleCounts; + + auto AddCount = [](auto &Counts, const auto &Val) { + auto CountIt = std::find_if(Counts.begin(), Counts.end(), + [&Val](auto E) { return E.first == Val; }); + if (CountIt == Counts.end()) { + Counts.emplace_back(Val, 1); } else { CountIt->second++; } + }; + + auto GetMostCommon = [](auto &Counts) { + auto CommonIt = + std::max_element(Counts.begin(), Counts.end(), + [](auto A, auto B) { return A.second < B.second; }); + assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); + return *CommonIt; + }; + + size_t NumConstantLanes = 0; + + // Count eligible lanes for each type of vector creation op + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (Lane.isUndef()) + continue; + + AddCount(SplatValueCounts, Lane); + + if (IsConstant(Lane)) { + NumConstantLanes++; + } else if (CanSwizzle) { + auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); + if (SwizzleSrcs.first) + AddCount(SwizzleCounts, SwizzleSrcs); + } } - auto CommonIt = - std::max_element(ValueCounts.begin(), ValueCounts.end(), - [](Entry A, Entry B) { return A.second < B.second; }); - assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector"); - SDValue SplatValue = CommonIt->first; - size_t NumCommon = CommonIt->second; - - // If v128.const is available, consider using it instead of a splat + + SDValue SplatValue; + size_t NumSplatLanes; + std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); + + SDValue SwizzleSrc; + SDValue SwizzleIndices; + size_t NumSwizzleLanes = 0; + if (SwizzleCounts.size()) + std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), + NumSwizzleLanes) = GetMostCommon(SwizzleCounts); + + // Predicate returning true if the lane is properly initialized by the + // original instruction + std::function<bool(size_t, const SDValue &)> IsLaneConstructed; + SDValue Result; if (Subtarget->hasUnimplementedSIMD128()) { - // {i32,i64,f32,f64}.const opcode, and value - const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes); - // SIMD prefix and opcode - const size_t SplatBytes = 2; - const size_t SplatConstBytes = SplatBytes + ConstBytes; - // SIMD prefix, opcode, and lane index - const size_t ReplaceBytes = 3; - const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes; - // SIMD prefix, v128.const opcode, and 128-bit value - const size_t VecConstBytes = 18; - // Initial v128.const and a replace_lane for each non-const operand - const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes; - // Initial splat and all necessary replace_lanes - const size_t SplatInitBytes = - IsConstant(SplatValue) - // Initial constant splat - ? (SplatConstBytes + - // Constant replace_lanes - (NumConst - NumCommon) * ReplaceConstBytes + - // Dynamic replace_lanes - (NumDynamic * ReplaceBytes)) - // Initial dynamic splat - : (SplatBytes + - // Constant replace_lanes - (NumConst * ReplaceConstBytes) + - // Dynamic replace_lanes - (NumDynamic - NumCommon) * ReplaceBytes); - if (ConstInitBytes < SplatInitBytes) { - // Create build_vector that will lower to initial v128.const + // Prefer swizzles over vector consts over splats + if (NumSwizzleLanes >= NumSplatLanes && + NumSwizzleLanes >= NumConstantLanes) { + Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, + SwizzleIndices); + auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); + IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { + return Swizzled == GetSwizzleSrcs(I, Lane); + }; + } else if (NumConstantLanes >= NumSplatLanes) { SmallVector<SDValue, 16> ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1321,26 +1413,35 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); } } - SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes); - // Add replace_lane instructions for non-const lanes - for (size_t I = 0; I < Lanes; ++I) { - const SDValue &Lane = Op->getOperand(I); - if (!Lane.isUndef() && !IsConstant(Lane)) - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, - DAG.getConstant(I, DL, MVT::i32)); - } - return Result; + Result = DAG.getBuildVector(VecT, DL, ConstLanes); + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; + } + } + if (!Result) { + // Use a splat, but possibly a load_splat + LoadSDNode *SplattedLoad; + if (Subtarget->hasUnimplementedSIMD128() && + (SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) && + SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { + Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue); + } else { + Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); } + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return Lane == SplatValue; + }; } - // Use a splat for the initial vector - SDValue Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); - // Add replace_lane instructions for other values + + // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); - if (Lane != SplatValue) + if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, DAG.getConstant(I, DL, MVT::i32)); } + return Result; } @@ -1415,11 +1516,6 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, // Only manually lower vector shifts assert(Op.getSimpleValueType().isVector()); - // Expand all vector shifts until V8 fixes its implementation - // TODO: remove this once V8 is fixed - if (!Subtarget->hasUnimplementedSIMD128()) - return unrollVectorShift(Op, DAG); - // Unroll non-splat vector shifts BuildVectorSDNode *ShiftVec; SDValue SplatVal; |