diff options
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 1002 |
1 files changed, 450 insertions, 552 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 75d16a42d020..bd8d6079e1ba 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -83,13 +83,11 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/Module.h" -#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/BranchProbability.h" @@ -151,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2)); +/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV). +constexpr MVT FlagsVT = MVT::i32; + // The APCS parameter registers. static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 @@ -803,6 +804,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setAllExpand(MVT::bf16); if (!Subtarget->hasFullFP16()) setOperationAction(ISD::BITCAST, MVT::bf16, Custom); + } else { + setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand); + setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand); + setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom); + setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom); } for (MVT VT : MVT::fixedlen_vector_valuetypes()) { @@ -1109,12 +1115,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, for (MVT VT : MVT::fp_valuetypes()) { setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand); } // ... or truncating stores setTruncStoreAction(MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); setTruncStoreAction(MVT::f64, MVT::f16, Expand); + setTruncStoreAction(MVT::f32, MVT::bf16, Expand); + setTruncStoreAction(MVT::f64, MVT::bf16, Expand); // ARM does not have i1 sign extending load. for (MVT VT : MVT::integer_valuetypes()) @@ -1635,8 +1644,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // Prefer likely predicted branches to selects on out-of-order cores. PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder(); - setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); - setPrefFunctionAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment())); + setPrefLoopAlignment(Align(1ULL << Subtarget->getPreferBranchLogAlignment())); + setPrefFunctionAlignment( + Align(1ULL << Subtarget->getPreferBranchLogAlignment())); setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4)); } @@ -1731,14 +1741,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(ARMISD::ASRL) MAKE_CASE(ARMISD::LSRL) MAKE_CASE(ARMISD::LSLL) - MAKE_CASE(ARMISD::SRL_GLUE) - MAKE_CASE(ARMISD::SRA_GLUE) + MAKE_CASE(ARMISD::LSLS) + MAKE_CASE(ARMISD::LSRS1) + MAKE_CASE(ARMISD::ASRS1) MAKE_CASE(ARMISD::RRX) MAKE_CASE(ARMISD::ADDC) MAKE_CASE(ARMISD::ADDE) MAKE_CASE(ARMISD::SUBC) MAKE_CASE(ARMISD::SUBE) - MAKE_CASE(ARMISD::LSLS) MAKE_CASE(ARMISD::VMOVRRD) MAKE_CASE(ARMISD::VMOVDRR) MAKE_CASE(ARMISD::VMOVhr) @@ -2324,6 +2334,59 @@ std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg( return std::make_pair(DstAddr, DstInfo); } +// Returns the type of copying which is required to set up a byval argument to +// a tail-called function. This isn't needed for non-tail calls, because they +// always need the equivalent of CopyOnce, but tail-calls sometimes need two to +// avoid clobbering another argument (CopyViaTemp), and sometimes can be +// optimised to zero copies when forwarding an argument from the caller's +// caller (NoCopy). +ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall( + SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); + + // Globals are always safe to copy from. + if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src)) + return CopyOnce; + + // Can only analyse frame index nodes, conservatively assume we need a + // temporary. + auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src); + auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst); + if (!SrcFrameIdxNode || !DstFrameIdxNode) + return CopyViaTemp; + + int SrcFI = SrcFrameIdxNode->getIndex(); + int DstFI = DstFrameIdxNode->getIndex(); + assert(MFI.isFixedObjectIndex(DstFI) && + "byval passed in non-fixed stack slot"); + + int64_t SrcOffset = MFI.getObjectOffset(SrcFI); + int64_t DstOffset = MFI.getObjectOffset(DstFI); + + // If the source is in the local frame, then the copy to the argument memory + // is always valid. + bool FixedSrc = MFI.isFixedObjectIndex(SrcFI); + if (!FixedSrc || + (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize())) + return CopyOnce; + + // In the case of byval arguments split between registers and the stack, + // computeAddrForCallArg returns a FrameIndex which corresponds only to the + // stack portion, but the Src SDValue will refer to the full value, including + // the local stack memory that the register portion gets stored into. We only + // need to compare them for equality, so normalise on the full value version. + uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI); + DstOffset -= RegSize; + + // If the value is already in the correct location, then no copying is + // needed. If not, then we need to copy via a temporary. + if (SrcOffset == DstOffset) + return NoCopy; + else + return CopyViaTemp; +} + void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, @@ -2379,6 +2442,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MachineFunction::CallSiteInfo CSInfo; bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; @@ -2407,8 +2471,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = false; // For both the non-secure calls and the returns from a CMSE entry function, - // the function needs to do some extra work afte r the call, or before the - // return, respectively, thus it cannot end with atail call + // the function needs to do some extra work after the call, or before the + // return, respectively, thus it cannot end with a tail call if (isCmseNSCall || AFI->isCmseNSEntryFunction()) isTailCall = false; @@ -2461,8 +2525,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Since callee will pop argument stack as a tail call, we must keep the // popped size 16-byte aligned. - Align StackAlign = DAG.getDataLayout().getStackAlignment(); - NumBytes = alignTo(NumBytes, StackAlign); + MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment(); + assert(StackAlign && "data layout string is missing stack alignment"); + NumBytes = alignTo(NumBytes, *StackAlign); // SPDiff will be negative if this tail call requires more space than we // would automatically have in our incoming argument space. Positive if we @@ -2490,6 +2555,66 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPassVector RegsToPass; SmallVector<SDValue, 8> MemOpChains; + // If we are doing a tail-call, any byval arguments will be written to stack + // space which was used for incoming arguments. If any the values being used + // are incoming byval arguments to this function, then they might be + // overwritten by the stores of the outgoing arguments. To avoid this, we + // need to make a temporary copy of them in local stack space, then copy back + // to the argument area. + DenseMap<unsigned, SDValue> ByValTemporaries; + SDValue ByValTempChain; + if (isTailCall) { + SmallVector<SDValue, 8> ByValCopyChains; + for (const CCValAssign &VA : ArgLocs) { + unsigned ArgIdx = VA.getValNo(); + SDValue Src = OutVals[ArgIdx]; + ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags; + + if (!Flags.isByVal()) + continue; + + SDValue Dst; + MachinePointerInfo DstInfo; + std::tie(Dst, DstInfo) = + computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff); + ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags); + + if (Copy == NoCopy) { + // If the argument is already at the correct offset on the stack + // (because we are forwarding a byval argument from our caller), we + // don't need any copying. + continue; + } else if (Copy == CopyOnce) { + // If the argument is in our local stack frame, no other argument + // preparation can clobber it, so we can copy it to the final location + // later. + ByValTemporaries[ArgIdx] = Src; + } else { + assert(Copy == CopyViaTemp && "unexpected enum value"); + // If we might be copying this argument from the outgoing argument + // stack area, we need to copy via a temporary in the local stack + // frame. + int TempFrameIdx = MFI.CreateStackObject( + Flags.getByValSize(), Flags.getNonZeroByValAlign(), false); + SDValue Temp = + DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout())); + + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); + SDValue AlignNode = + DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32); + + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode}; + ByValCopyChains.push_back( + DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops)); + ByValTemporaries[ArgIdx] = Temp; + } + } + if (!ByValCopyChains.empty()) + ByValTempChain = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains); + } + // During a tail call, stores to the argument area must happen after all of // the function's incoming arguments have been loaded because they may alias. // This is done by folding in a TokenFactor from LowerFormalArguments, but @@ -2527,6 +2652,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) { Chain = DAG.getStackArgumentTokenFactor(Chain); + if (ByValTempChain) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, + ByValTempChain); AfterFormalArgLoads = true; } @@ -2598,8 +2726,18 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed(); - if (CurByValIdx < ByValArgsCount) { + SDValue ByValSrc; + bool NeedsStackCopy; + if (ByValTemporaries.contains(realArgIdx)) { + ByValSrc = ByValTemporaries[realArgIdx]; + NeedsStackCopy = true; + } else { + ByValSrc = Arg; + NeedsStackCopy = !isTailCall; + } + // If part of the argument is in registers, load them. + if (CurByValIdx < ByValArgsCount) { unsigned RegBegin, RegEnd; CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); @@ -2608,7 +2746,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned int i, j; for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { SDValue Const = DAG.getConstant(4*i, dl, MVT::i32); - SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), DAG.InferPtrAlign(AddArg)); @@ -2623,14 +2761,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CCInfo.nextInRegsParam(); } - if (Flags.getByValSize() > 4*offset) { + // If the memory part of the argument isn't already in the correct place + // (which can happen with tail calls), copy it into the argument area. + if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) { auto PtrVT = getPointerTy(DAG.getDataLayout()); SDValue Dst; MachinePointerInfo DstInfo; std::tie(Dst, DstInfo) = computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff); SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl); - SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset); + SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl, MVT::i32); SDValue AlignNode = @@ -2841,7 +2981,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Ops.push_back(Callee); if (isTailCall) { - Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32)); + Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32)); } // Add argument registers to the end of the list so that they are known live @@ -2872,17 +3012,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (InGlue.getNode()) Ops.push_back(InGlue); - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); if (isTailCall) { MF.getFrameInfo().setHasTailCall(); - SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops); + SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops); DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); return Ret; } // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); + Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops); DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); InGlue = Chain.getValue(1); DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); @@ -2892,7 +3031,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // we need to undo that after it returns to restore the status-quo. bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt; uint64_t CalleePopBytes = - canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL; + canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U; Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl); if (!Ins.empty()) @@ -2914,7 +3053,7 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, // Byval (as with any stack) slots are always at least 4 byte aligned. Alignment = std::max(Alignment, Align(4)); - unsigned Reg = State->AllocateReg(GPRArgRegs); + MCRegister Reg = State->AllocateReg(GPRArgRegs); if (!Reg) return; @@ -2959,50 +3098,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, Size = std::max<int>(Size - Excess, 0); } -/// MatchingStackOffset - Return true if the given stack call argument is -/// already available in the same position (relatively) of the caller's -/// incoming argument stack. -static -bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, - MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { - unsigned Bytes = Arg.getValueSizeInBits() / 8; - int FI = std::numeric_limits<int>::max(); - if (Arg.getOpcode() == ISD::CopyFromReg) { - Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); - if (!VR.isVirtual()) - return false; - MachineInstr *Def = MRI->getVRegDef(VR); - if (!Def) - return false; - if (!Flags.isByVal()) { - if (!TII->isLoadFromStackSlot(*Def, FI)) - return false; - } else { - return false; - } - } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { - if (Flags.isByVal()) - // ByVal argument is passed in as a pointer but it's now being - // dereferenced. e.g. - // define @foo(%struct.X* %A) { - // tail call @bar(%struct.X* byval %A) - // } - return false; - SDValue Ptr = Ld->getBasePtr(); - FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); - if (!FINode) - return false; - FI = FINode->getIndex(); - } else - return false; - - assert(FI != std::numeric_limits<int>::max()); - if (!MFI.isFixedObjectIndex(FI)) - return false; - return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); -} - /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. Note that this function also @@ -3024,19 +3119,30 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( assert(Subtarget->supportsTailCall()); - // Indirect tail calls cannot be optimized for Thumb1 if the args - // to the call take up r0-r3. The reason is that there are no legal registers - // left to hold the pointer to the function to be called. - // Similarly, if the function uses return address sign and authentication, - // r12 is needed to hold the PAC and is not available to hold the callee - // address. - if (Outs.size() >= 4 && - (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) { - if (Subtarget->isThumb1Only()) - return false; - // Conservatively assume the function spills LR. - if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)) + // Indirect tail-calls require a register to hold the target address. That + // register must be: + // * Allocatable (i.e. r0-r7 if the target is Thumb1). + // * Not callee-saved, so must be one of r0-r3 or r12. + // * Not used to hold an argument to the tail-called function, which might be + // in r0-r3. + // * Not used to hold the return address authentication code, which is in r12 + // if enabled. + // Sometimes, no register matches all of these conditions, so we can't do a + // tail-call. + if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) { + SmallSet<MCPhysReg, 5> AddressRegisters; + for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) + AddressRegisters.insert(R); + if (!(Subtarget->isThumb1Only() || + MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))) + AddressRegisters.insert(ARM::R12); + for (const CCValAssign &AL : ArgLocs) + if (AL.isRegLoc()) + AddressRegisters.erase(AL.getLocReg()); + if (AddressRegisters.empty()) { + LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n"); return false; + } } // Look for obvious safe cases to perform tail call optimization that do not @@ -3045,18 +3151,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF.hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) { + LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n"); return false; + } - if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + if (canGuaranteeTCO(CalleeCC, + getTargetMachine().Options.GuaranteedTailCallOpt)) { + LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false") + << " (guaranteed tail-call CC)\n"); return CalleeCC == CallerCC; + } // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); bool isCallerStructRet = MF.getFunction().hasStructRetAttr(); - if (isCalleeStructRet || isCallerStructRet) + if (isCalleeStructRet != isCallerStructRet) { + LLVM_DEBUG(dbgs() << "false (struct-ret)\n"); return false; + } // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic @@ -3069,8 +3183,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && - (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) + (!TT.isOSWindows() || TT.isOSBinFormatELF() || + TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "false (external weak linkage)\n"); return false; + } } // Check that the call results are passed in the same way. @@ -3079,70 +3196,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( getEffectiveCallingConv(CalleeCC, isVarArg), getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), - CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) + CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) { + LLVM_DEBUG(dbgs() << "false (incompatible results)\n"); return false; + } // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); - if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) { + LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n"); return false; + } } - // If Caller's vararg or byval argument has been split between registers and - // stack, do not perform tail call, since part of the argument is in caller's - // local frame. + // If Caller's vararg argument has been split between registers and stack, do + // not perform tail call, since part of the argument is in caller's local + // frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); - if (AFI_Caller->getArgRegsSaveSize()) + if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) { + LLVM_DEBUG(dbgs() << "false (arg reg save area)\n"); return false; + } // If the callee takes no arguments then go on to check the results of the // call. - if (!Outs.empty()) { - if (CCInfo.getStackSize()) { - // Check if the arguments are already laid out in the right way as - // the caller's fixed stack objects. - MachineFrameInfo &MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); - i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = OutVals[realArgIdx]; - ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - if (VA.getLocInfo() == CCValAssign::Indirect) - return false; - if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { - // f64 and vector types are split into multiple registers or - // register/stack-slot combinations. The types will not match - // the registers; give up on memory f64 refs until we figure - // out what to do about this. - if (!VA.isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - if (RegVT == MVT::v2f64) { - if (!ArgLocs[++i].isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - } - } else if (!VA.isRegLoc()) { - if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, - MFI, MRI, TII)) - return false; - } - } - } - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) - return false; + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) { + LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n"); + return false; } + // If the stack arguments for this call do not fit into our own save area then + // the call cannot be made tail. + if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize()) + return false; + + LLVM_DEBUG(dbgs() << "true\n"); return true; } @@ -3150,7 +3241,7 @@ bool ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - LLVMContext &Context) const { + LLVMContext &Context, const Type *RetTy) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg)); @@ -3373,7 +3464,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { return false; SDValue TCChain = Chain; - SDNode *Copy = *N->use_begin(); + SDNode *Copy = *N->user_begin(); if (Copy->getOpcode() == ISD::CopyToReg) { // If the copy has a glue operand, we conservatively assume it isn't safe to // perform a tail call. @@ -3384,7 +3475,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { SDNode *VMov = Copy; // f64 returned in a pair of GPRs. SmallPtrSet<SDNode*, 2> Copies; - for (SDNode *U : VMov->uses()) { + for (SDNode *U : VMov->users()) { if (U->getOpcode() != ISD::CopyToReg) return false; Copies.insert(U); @@ -3392,7 +3483,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (Copies.size() > 2) return false; - for (SDNode *U : VMov->uses()) { + for (SDNode *U : VMov->users()) { SDValue UseChain = U->getOperand(0); if (Copies.count(UseChain.getNode())) // Second CopyToReg @@ -3411,7 +3502,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { // f32 returned in a single GPR. if (!Copy->hasOneUse()) return false; - Copy = *Copy->use_begin(); + Copy = *Copy->user_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; // If the copy has a glue operand, we conservatively assume it isn't safe to @@ -3424,7 +3515,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { } bool HasRet = false; - for (const SDNode *U : Copy->uses()) { + for (const SDNode *U : Copy->users()) { if (U->getOpcode() != ARMISD::RET_GLUE && U->getOpcode() != ARMISD::INTRET_GLUE) return false; @@ -4701,8 +4792,9 @@ SDValue ARMTargetLowering::LowerFormalArguments( if (canGuaranteeTCO(CallConv, TailCallOpt)) { // The only way to guarantee a tail call is if the callee restores its // argument area, but it must also keep the stack aligned when doing so. - const DataLayout &DL = DAG.getDataLayout(); - StackArgSize = alignTo(StackArgSize, DL.getStackAlignment()); + MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment(); + assert(StackAlign && "data layout string is missing stack alignment"); + StackArgSize = alignTo(StackArgSize, *StackAlign); AFI->setArgumentStackToRestore(StackArgSize); } @@ -4840,14 +4932,11 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) && LHS.getConstantOperandVal(1) < 31) { unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1; - SDValue Shift = DAG.getNode(ARMISD::LSLS, dl, - DAG.getVTList(MVT::i32, MVT::i32), - LHS.getOperand(0), - DAG.getConstant(ShiftAmt, dl, MVT::i32)); - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, - Shift.getValue(1), SDValue()); + SDValue Shift = + DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT), + LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32)); ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32); - return Chain.getValue(1); + return Shift.getValue(1); } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); @@ -4879,7 +4968,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); + return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS); } /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. @@ -4887,35 +4976,14 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl, bool Signaling) const { assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64); - SDValue Cmp; + SDValue Flags; if (!isFloatingPointZero(RHS)) - Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, - dl, MVT::Glue, LHS, RHS); + Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT, + LHS, RHS); else - Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, - dl, MVT::Glue, LHS); - return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); -} - -/// duplicateCmp - Glue values can have only one use, so this function -/// duplicates a comparison node. -SDValue -ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { - unsigned Opc = Cmp.getOpcode(); - SDLoc DL(Cmp); - if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) - return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); - - assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); - Cmp = Cmp.getOperand(0); - Opc = Cmp.getOpcode(); - if (Opc == ARMISD::CMPFP) - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); - else { - assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); - Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); - } - return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); + Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl, + FlagsVT, LHS); + return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags); } // This function returns three things: the arithmetic computation itself @@ -4943,7 +5011,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, case ISD::SADDO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS); break; case ISD::UADDO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); @@ -4952,17 +5020,17 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, Value = DAG.getNode(ARMISD::ADDC, dl, DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) .getValue(0); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS); break; case ISD::SSUBO: ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS); break; case ISD::USUBO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS); + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS); break; case ISD::UMULO: // We generate a UMUL_LOHI and then check if the high word is 0. @@ -4970,7 +5038,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, Value = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(Op.getValueType(), Op.getValueType()), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1), DAG.getConstant(0, dl, MVT::i32)); Value = Value.getValue(0); // We only want the low 32 bits for the result. break; @@ -4981,7 +5049,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, Value = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(Op.getValueType(), Op.getValueType()), LHS, RHS); - OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1), + OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1), DAG.getNode(ISD::SRA, dl, Op.getValueType(), Value.getValue(0), DAG.getConstant(31, dl, MVT::i32))); @@ -5001,15 +5069,14 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDLoc dl(Op); // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, dl, MVT::i32); SDValue FVal = DAG.getConstant(0, dl, MVT::i32); EVT VT = Op.getValueType(); - SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, - ARMcc, CCR, OverflowCmp); + SDValue Overflow = + DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); @@ -5146,11 +5213,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Value, OverflowCmp; SDValue ARMcc; std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); - return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR, - OverflowCmp, DAG); + return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG); } // Convert: @@ -5178,14 +5243,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { False = SelectTrue; } - if (True.getNode() && False.getNode()) { - EVT VT = Op.getValueType(); - SDValue ARMcc = Cond.getOperand(2); - SDValue CCR = Cond.getOperand(3); - SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); - assert(True.getValueType() == VT); - return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); - } + if (True.getNode() && False.getNode()) + return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2), + Cond.getOperand(3), DAG); } } @@ -5250,8 +5310,8 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, } SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, - SDValue TrueVal, SDValue ARMcc, SDValue CCR, - SDValue Cmp, SelectionDAG &DAG) const { + SDValue TrueVal, SDValue ARMcc, + SDValue Flags, SelectionDAG &DAG) const { if (!Subtarget->hasFP64() && VT == MVT::f64) { FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), FalseVal); @@ -5264,15 +5324,13 @@ SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue FalseHigh = FalseVal.getValue(1); SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, - ARMcc, CCR, Cmp); + ARMcc, Flags); SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, - ARMcc, CCR, duplicateCmp(Cmp, DAG)); + ARMcc, Flags); return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); - } else { - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, - Cmp); } + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags); } static bool isGTorGE(ISD::CondCode CC) { @@ -5545,12 +5603,11 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { } SDValue ARMcc; - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); // Choose GE over PL, which vsel does now support if (ARMcc->getAsZExtVal() == ARMCC::PL) ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32); - return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); + return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; @@ -5580,13 +5637,10 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); + SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32); - // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); - Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); + Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG); } return Result; } @@ -5687,9 +5741,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { RHS = DAG.getNode(ISD::AND, dl, MVT::i32, bitcastf32Toi32(RHS, DAG), Mask); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, - Chain, Dest, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, + Cmp); } SDValue LHS1, LHS2; @@ -5700,9 +5753,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); ARMCC::CondCodes CondCode = IntCCToARMCC(CC); ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; - return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops); + return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops); } return SDValue(); @@ -5736,9 +5788,8 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); CondCode = ARMCC::getOppositeCondition(CondCode); ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, OverflowCmp); } @@ -5790,18 +5841,15 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { CondCode = ARMCC::getOppositeCondition(CondCode); ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); } - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, OverflowCmp); } if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, - Chain, Dest, ARMcc, CCR, Cmp); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp); } if (getTargetMachine().Options.UnsafeFPMath && @@ -5816,14 +5864,12 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; - SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); + SDValue Ops[] = {Chain, Dest, ARMcc, Cmp}; + SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); - SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; - Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops); + SDValue Ops[] = {Res, Dest, ARMcc, Cmp}; + Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops); } return Res; } @@ -5966,7 +6012,7 @@ static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, DAG.getConstant((1 << BW) - 1, DL, VT)); if (IsSigned) Max = DAG.getNode(ISD::SMAX, DL, VT, Max, - DAG.getConstant(-(1 << BW), DL, VT)); + DAG.getSignedConstant(-(1 << BW), DL, VT)); return Max; } @@ -6263,10 +6309,13 @@ SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG, DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op)); if ((DstVT == MVT::i16 || DstVT == MVT::i32) && - (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) + (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) { + if (Subtarget->hasFullFP16() && !Subtarget->hasBF16()) + Op = DAG.getBitcast(MVT::f16, Op); return DAG.getNode( ISD::TRUNCATE, SDLoc(N), DstVT, MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op)); + } if (!(SrcVT == MVT::i64 || DstVT == MVT::i64)) return SDValue(); @@ -6328,7 +6377,6 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); @@ -6343,8 +6391,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, - ARMcc, CCR, CmpLo); + SDValue Lo = + DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo); SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiBigShift = Opc == ISD::SRA @@ -6353,8 +6401,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, : DAG.getConstant(0, dl, VT); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, - ARMcc, CCR, CmpHi); + SDValue Hi = + DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -6372,7 +6420,6 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, @@ -6386,14 +6433,14 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, - ARMcc, CCR, CmpHi); + SDValue Hi = + DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi); SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, - DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo); + DAG.getConstant(0, dl, VT), ARMcc, CmpLo); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -6765,10 +6812,10 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, SDValue Lo, Hi; std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32); - // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and - // captures the result into a carry flag. - unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE; - Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi); + // First, build a LSRS1/ASRS1 op, which shifts the top part by one and + // captures the shifted out bit into a carry flag. + unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1; + Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi); // The low part is an ARMISD::RRX operand, which shifts the carry in. Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); @@ -6980,11 +7027,8 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) { SDValue TVal = DAG.getConstant(1, DL, MVT::i32); SDValue ARMcc = DAG.getConstant( IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR, - Cmp.getValue(1), SDValue()); return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc, - CCR, Chain.getValue(1)); + Cmp.getValue(1)); } /// isVMOVModifiedImm - Check if the specified splat value corresponds to a @@ -7111,19 +7155,6 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, ImmMask <<= 1; } - if (DAG.getDataLayout().isBigEndian()) { - // Reverse the order of elements within the vector. - unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8; - unsigned Mask = (1 << BytesPerElem) - 1; - unsigned NumElems = 8 / BytesPerElem; - unsigned NewImm = 0; - for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) { - unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask); - NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem; - } - Imm = NewImm; - } - // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; @@ -7879,6 +7910,8 @@ static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) { case ISD::MUL: case ISD::SADDSAT: case ISD::UADDSAT: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: return true; case ISD::SUB: case ISD::SSUBSAT: @@ -7936,7 +7969,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // generate a vdup of the constant. if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize && (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) && - all_of(BVN->uses(), + all_of(BVN->users(), [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) { EVT DupVT = SplatBitSize == 32 ? MVT::v4i32 : SplatBitSize == 16 ? MVT::v8i16 @@ -7956,7 +7989,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov); } // Try an immediate VMVN. @@ -7966,7 +7999,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm); if (Val.getNode()) { SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov); } // Use vmov.f32 to materialize other v2f32 and v4f32 splats. @@ -8018,7 +8051,6 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) isConstant = false; - ValueCounts.insert(std::make_pair(V, 0)); unsigned &Count = ValueCounts[V]; // Is this value dominant? (takes up more than half of the lanes) @@ -8335,9 +8367,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, } // Final check before we try to actually produce a shuffle. - LLVM_DEBUG(for (auto Src - : Sources) - assert(Src.ShuffleVec.getValueType() == ShuffleVT);); + LLVM_DEBUG({ + for (auto Src : Sources) + assert(Src.ShuffleVec.getValueType() == ShuffleVT); + }); // The stars all align, our next step is to produce the mask for the shuffle. SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1); @@ -8542,7 +8575,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, SmallVector<SDValue, 8> VTBLMask; for (int I : ShuffleMask) - VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32)); + VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32)); if (V2.getNode()->isUndef()) return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, @@ -9210,7 +9243,7 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, }; // Concat each pair of subvectors and pack into the lower half of the array. - SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end()); + SmallVector<SDValue> ConcatOps(Op->ops()); while (ConcatOps.size() > 1) { for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) { SDValue V1 = ConcatOps[I]; @@ -10467,33 +10500,42 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, Results.push_back(Cycles32.getValue(1)); } -static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) { - SDLoc dl(V.getNode()); - auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32); - bool isBigEndian = DAG.getDataLayout().isBigEndian(); - if (isBigEndian) - std::swap (VLo, VHi); +static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0, + SDValue V1) { + SDLoc dl(V0.getNode()); SDValue RegClass = DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); - const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 }; + const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1}; return SDValue( DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); } +static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V) { + SDLoc dl(V.getNode()); + auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32); + bool isBigEndian = DAG.getDataLayout().isBigEndian(); + if (isBigEndian) + std::swap(VLo, VHi); + return createGPRPairNode2xi32(DAG, VLo, VHi); +} + static void ReplaceCMP_SWAP_64Results(SDNode *N, - SmallVectorImpl<SDValue> & Results, - SelectionDAG &DAG) { + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { assert(N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"); - SDValue Ops[] = {N->getOperand(1), - createGPRPairNode(DAG, N->getOperand(2)), - createGPRPairNode(DAG, N->getOperand(3)), - N->getOperand(0)}; + SDValue Ops[] = { + createGPRPairNode2xi32(DAG, N->getOperand(1), + DAG.getUNDEF(MVT::i32)), // pointer, temp + createGPRPairNodei64(DAG, N->getOperand(2)), // expected + createGPRPairNodei64(DAG, N->getOperand(3)), // new + N->getOperand(0), // chain in + }; SDNode *CmpSwap = DAG.getMachineNode( ARM::CMP_SWAP_64, SDLoc(N), - DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); + DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops); MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); @@ -10536,21 +10578,14 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit - // in CMPFP and CMPFPE, but instead it should be made explicit by these - // instructions using a chain instead of glue. This would also fix the problem - // here (and also in LowerSELECT_CC) where we generate two comparisons when - // CondCode2 != AL. SDValue True = DAG.getConstant(1, dl, VT); SDValue False = DAG.getConstant(0, dl, VT); SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling); - SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG); + SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG); if (CondCode2 != ARMCC::AL) { ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32); - Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling); - Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG); + Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG); } return DAG.getMergeValues({Result, Chain}, dl); } @@ -10564,6 +10599,17 @@ SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const { return DAG.getFrameIndex(FI, VT); } +SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + MakeLibCallOptions CallOptions; + MVT SVT = Op.getOperand(0).getSimpleValueType(); + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16); + SDValue Res = + makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first; + return DAG.getBitcast(MVT::i32, Res); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { @@ -10689,6 +10735,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG); case ISD::SPONENTRY: return LowerSPONENTRY(Op, DAG); + case ISD::FP_TO_BF16: + return LowerFP_TO_BF16(Op, DAG); case ARMISD::WIN__DBZCHK: return SDValue(); } } @@ -13803,6 +13851,14 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N, N->getOpcode() == ISD::SRL) && "Expected shift op"); + SDValue ShiftLHS = N->getOperand(0); + if (!ShiftLHS->hasOneUse()) + return false; + + if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND && + !ShiftLHS.getOperand(0)->hasOneUse()) + return false; + if (Level == BeforeLegalizeTypes) return true; @@ -13938,7 +13994,7 @@ static SDValue PerformSHLSimplify(SDNode *N, return SDValue(); // Check that all the users could perform the shl themselves. - for (auto *U : N->uses()) { + for (auto *U : N->users()) { switch(U->getOpcode()) { default: return SDValue(); @@ -14435,9 +14491,9 @@ static SDValue PerformANDCombine(SDNode *N, DAG, dl, VbicVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); + DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0)); SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic); } } } @@ -14731,9 +14787,9 @@ static SDValue PerformORCombine(SDNode *N, SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm); if (Val.getNode()) { SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0)); SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr); } } } @@ -14980,7 +15036,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) { } // Check that N is CMPZ(CSINC(0, 0, CC, X)), -// or CMPZ(CMOV(1, 0, CC, $cpsr, X)) +// or CMPZ(CMOV(1, 0, CC, X)) // return X if valid. static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1))) @@ -15004,22 +15060,22 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) { if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) && isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) { CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2); - return CSInc.getOperand(4); + return CSInc.getOperand(3); } if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) && isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) { CC = ARMCC::getOppositeCondition( (ARMCC::CondCodes)CSInc.getConstantOperandVal(2)); - return CSInc.getOperand(4); + return CSInc.getOperand(3); } return SDValue(); } static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) { // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in - // t92: glue = ARMISD::CMPZ t74, 0 + // t92: flags = ARMISD::CMPZ t74, 0 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92 - // t96: glue = ARMISD::CMPZ t93, 0 + // t96: flags = ARMISD::CMPZ t93, 0 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96 ARMCC::CondCodes Cond; if (SDValue C = IsCMPZCSINC(N, Cond)) @@ -15124,9 +15180,9 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue Op0, Op1; while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) { if (isa<ConstantSDNode>(BV.getOperand(2))) { - if (BV.getConstantOperandVal(2) == Offset) + if (BV.getConstantOperandVal(2) == Offset && !Op0) Op0 = BV.getOperand(1); - if (BV.getConstantOperandVal(2) == Offset + 1) + if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1) Op1 = BV.getOperand(1); } BV = BV.getOperand(0); @@ -15324,7 +15380,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { assert(EltVT == MVT::f32 && "Unexpected type!"); // Check 1.2. - SDNode *Use = *N->use_begin(); + SDNode *Use = *N->user_begin(); if (Use->getOpcode() != ISD::BITCAST || Use->getValueType(0).isFloatingPoint()) return SDValue(); @@ -15434,6 +15490,9 @@ static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, if (ST->isLittle()) return DAG.getNode(ISD::BITCAST, dl, VT, Op); + // VT VECTOR_REG_CAST (VT Op) -> Op + if (Op.getValueType() == VT) + return Op; // VECTOR_REG_CAST undef -> undef if (Op.isUndef()) return DAG.getUNDEF(VT); @@ -15526,9 +15585,8 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { !isa<ConstantSDNode>(Ext.getOperand(1)) || Ext.getConstantOperandVal(1) % 2 != 0) return SDValue(); - if (Ext->use_size() == 1 && - (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP || - Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP)) + if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP || + Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP)) return SDValue(); SDValue Op0 = Ext.getOperand(0); @@ -15539,24 +15597,24 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return SDValue(); // Find another extract, of Lane + 1 - auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) { + auto OtherIt = find_if(Op0->users(), [&](SDNode *V) { return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT && isa<ConstantSDNode>(V->getOperand(1)) && V->getConstantOperandVal(1) == Lane + 1 && V->getOperand(0).getResNo() == ResNo; }); - if (OtherIt == Op0->uses().end()) + if (OtherIt == Op0->users().end()) return SDValue(); // For float extracts, we need to be converting to a i32 for both vector // lanes. SDValue OtherExt(*OtherIt, 0); if (OtherExt.getValueType() != MVT::i32) { - if (OtherExt->use_size() != 1 || - OtherExt->use_begin()->getOpcode() != ISD::BITCAST || - OtherExt->use_begin()->getValueType(0) != MVT::i32) + if (!OtherExt->hasOneUse() || + OtherExt->user_begin()->getOpcode() != ISD::BITCAST || + OtherExt->user_begin()->getValueType(0) != MVT::i32) return SDValue(); - OtherExt = SDValue(*OtherExt->use_begin(), 0); + OtherExt = SDValue(*OtherExt->user_begin(), 0); } // Convert the type to a f64 and extract with a VMOVRRD. @@ -16166,14 +16224,12 @@ static SDValue CombineBaseUpdate(SDNode *N, SmallVector<BaseUpdateUser, 8> BaseUpdates; // Search for a use of the address operand that is an increment. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - if (UI.getUse().getResNo() != Addr.getResNo() || - User->getNumOperands() != 2) + for (SDUse &Use : Addr->uses()) { + SDNode *User = Use.getUser(); + if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2) continue; - SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1); + SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1); unsigned ConstInc = getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG); @@ -16188,15 +16244,14 @@ static SDValue CombineBaseUpdate(SDNode *N, if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) { unsigned Offset = getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG); - for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end(); - UI != UE; ++UI) { + for (SDUse &Use : Base->uses()) { - SDNode *User = *UI; - if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() || + SDNode *User = Use.getUser(); + if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() || User->getNumOperands() != 2) continue; - SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0); + SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0); unsigned UserOffset = getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG); @@ -16269,12 +16324,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N, return SDValue(); // Search for a use of the address operand that is an increment. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); - UI != UE; ++UI) { - SDNode *User = *UI; - if (User->getOpcode() != ISD::ADD || - UI.getUse().getResNo() != Addr.getResNo()) + for (SDUse &Use : Addr->uses()) { + SDNode *User = Use.getUser(); + if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo()) continue; // Check that the add is independent of the load/store. Otherwise, folding @@ -16404,12 +16456,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // First check that all the vldN-lane uses are VDUPLANEs and that the lane // numbers match the load. unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3); - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { + for (SDUse &Use : VLD->uses()) { // Ignore uses of the chain result. - if (UI.getUse().getResNo() == NumVecs) + if (Use.getResNo() == NumVecs) continue; - SDNode *User = *UI; + SDNode *User = Use.getUser(); if (User->getOpcode() != ARMISD::VDUPLANE || VLDLaneNo != User->getConstantOperandVal(1)) return false; @@ -16429,14 +16480,12 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { VLDMemInt->getMemOperand()); // Update the uses. - for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); - UI != UE; ++UI) { - unsigned ResNo = UI.getUse().getResNo(); + for (SDUse &Use : VLD->uses()) { + unsigned ResNo = Use.getResNo(); // Ignore uses of the chain result. if (ResNo == NumVecs) continue; - SDNode *User = *UI; - DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); + DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo)); } // Now the vldN-lane intrinsic is dead except for its chain result. @@ -17643,6 +17692,11 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N, // No immediate versions of these to check for. break; + case Intrinsic::arm_neon_vbsl: { + SDLoc dl(N); + return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + } case Intrinsic::arm_mve_vqdmlah: case Intrinsic::arm_mve_vqdmlash: case Intrinsic::arm_mve_vqrdmlah: @@ -18102,7 +18156,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue(); - SDValue CmpZ = CMOV->getOperand(4); + SDValue CmpZ = CMOV->getOperand(3); // The compare must be against zero. if (!isNullConstant(CmpZ->getOperand(1))) @@ -18286,9 +18340,9 @@ static SDValue PerformHWLoopCombine(SDNode *N, SelectionDAG &DAG = DCI.DAG; SDValue Elements = Int.getOperand(2); unsigned IntOp = Int->getConstantOperandVal(1); - assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR) - && "expected single br user"); - SDNode *Br = *N->use_begin(); + assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) && + "expected single br user"); + SDNode *Br = *N->user_begin(); SDValue OtherTarget = Br->getOperand(1); // Update the unconditional branch to branch to the given Dest. @@ -18346,12 +18400,11 @@ static SDValue PerformHWLoopCombine(SDNode *N, /// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND. SDValue ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { - SDValue Cmp = N->getOperand(4); + SDValue Cmp = N->getOperand(3); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at NE cases. return SDValue(); - EVT VT = N->getValueType(0); SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); @@ -18360,17 +18413,17 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { SDValue ARMcc = N->getOperand(2); ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal(); - // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0)) - // -> (brcond Chain BB CC CPSR Cmp) + // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0)) + // -> (brcond Chain BB CC Flags) if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() && LHS->getOperand(0)->getOpcode() == ARMISD::CMOV && LHS->getOperand(0)->hasOneUse() && isNullConstant(LHS->getOperand(0)->getOperand(0)) && isOneConstant(LHS->getOperand(0)->getOperand(1)) && isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) { - return DAG.getNode( - ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2), - LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4)); + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB, + LHS->getOperand(0)->getOperand(2), + LHS->getOperand(0)->getOperand(3)); } return SDValue(); @@ -18379,7 +18432,7 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const { /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. SDValue ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { - SDValue Cmp = N->getOperand(4); + SDValue Cmp = N->getOperand(3); if (Cmp.getOpcode() != ARMISD::CMPZ) // Only looking at EQ and NE cases. return SDValue(); @@ -18419,42 +18472,38 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { /// FIXME: Turn this into a target neutral optimization? SDValue Res; if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { - Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, - N->getOperand(3), Cmp); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp); } else if (CC == ARMCC::EQ && TrueVal == RHS) { SDValue ARMcc; SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); - Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, - N->getOperand(3), NewCmp); + Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp); } - // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0)) - // -> (cmov F T CC CPSR Cmp) + // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0)) + // -> (cmov F T CC Flags) if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() && isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) { return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - LHS->getOperand(2), LHS->getOperand(3), - LHS->getOperand(4)); + LHS->getOperand(2), LHS->getOperand(3)); } if (!VT.isInteger()) return SDValue(); // Fold away an unneccessary CMPZ/CMOV - // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) -> - // if C1==EQ -> CMOV A, B, C2, $cpsr, D - // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D + // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) -> + // if C1==EQ -> CMOV A, B, C2, D + // if C1==NE -> CMOV A, B, NOT(C2), D if (N->getConstantOperandVal(2) == ARMCC::EQ || N->getConstantOperandVal(2) == ARMCC::NE) { ARMCC::CondCodes Cond; - if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) { + if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) { if (N->getConstantOperandVal(2) == ARMCC::NE) Cond = ARMCC::getOppositeCondition(Cond); return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0), N->getOperand(1), - DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32), - N->getOperand(3), C); + DAG.getConstant(Cond, SDLoc(N), MVT::i32), C); } } @@ -18494,10 +18543,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1 SDValue Sub = DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS); - SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, - Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc, - N->getOperand(3), CPSRGlue.getValue(1)); + Sub.getValue(1)); FalseVal = Sub; } } else if (isNullConstant(TrueVal)) { @@ -18508,11 +18555,9 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1 SDValue Sub = DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS); - SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR, - Sub.getValue(1), SDValue()); Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal, DAG.getConstant(ARMCC::NE, dl, MVT::i32), - N->getOperand(3), CPSRGlue.getValue(1)); + Sub.getValue(1)); FalseVal = Sub; } } @@ -18582,7 +18627,9 @@ static SDValue PerformBITCASTCombine(SDNode *N, // We may have a bitcast of something that has already had this bitcast // combine performed on it, so skip past any VECTOR_REG_CASTs. - while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST) + if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST && + Src.getOperand(0).getValueType().getScalarSizeInBits() <= + Src.getValueType().getScalarSizeInBits()) Src = Src.getOperand(0); // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that @@ -19060,6 +19107,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); break; } + case ARMISD::VBSP: + if (N->getOperand(1) == N->getOperand(2)) + return N->getOperand(1); + return SDValue(); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (N->getConstantOperandVal(1)) { @@ -19261,149 +19312,6 @@ bool ARMTargetLowering::isFNegFree(EVT VT) const { return false; } -/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth -/// of the vector elements. -static bool areExtractExts(Value *Ext1, Value *Ext2) { - auto areExtDoubled = [](Instruction *Ext) { - return Ext->getType()->getScalarSizeInBits() == - 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits(); - }; - - if (!match(Ext1, m_ZExtOrSExt(m_Value())) || - !match(Ext2, m_ZExtOrSExt(m_Value())) || - !areExtDoubled(cast<Instruction>(Ext1)) || - !areExtDoubled(cast<Instruction>(Ext2))) - return false; - - return true; -} - -/// Check if sinking \p I's operands to I's basic block is profitable, because -/// the operands can be folded into a target instruction, e.g. -/// sext/zext can be folded into vsubl. -bool ARMTargetLowering::shouldSinkOperands(Instruction *I, - SmallVectorImpl<Use *> &Ops) const { - if (!I->getType()->isVectorTy()) - return false; - - if (Subtarget->hasNEON()) { - switch (I->getOpcode()) { - case Instruction::Sub: - case Instruction::Add: { - if (!areExtractExts(I->getOperand(0), I->getOperand(1))) - return false; - Ops.push_back(&I->getOperandUse(0)); - Ops.push_back(&I->getOperandUse(1)); - return true; - } - default: - return false; - } - } - - if (!Subtarget->hasMVEIntegerOps()) - return false; - - auto IsFMSMul = [&](Instruction *I) { - if (!I->hasOneUse()) - return false; - auto *Sub = cast<Instruction>(*I->users().begin()); - return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I; - }; - auto IsFMS = [&](Instruction *I) { - if (match(I->getOperand(0), m_FNeg(m_Value())) || - match(I->getOperand(1), m_FNeg(m_Value()))) - return true; - return false; - }; - - auto IsSinker = [&](Instruction *I, int Operand) { - switch (I->getOpcode()) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::FAdd: - case Instruction::ICmp: - case Instruction::FCmp: - return true; - case Instruction::FMul: - return !IsFMSMul(I); - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - return Operand == 1; - case Instruction::Call: - if (auto *II = dyn_cast<IntrinsicInst>(I)) { - switch (II->getIntrinsicID()) { - case Intrinsic::fma: - return !IsFMS(I); - case Intrinsic::sadd_sat: - case Intrinsic::uadd_sat: - case Intrinsic::arm_mve_add_predicated: - case Intrinsic::arm_mve_mul_predicated: - case Intrinsic::arm_mve_qadd_predicated: - case Intrinsic::arm_mve_vhadd: - case Intrinsic::arm_mve_hadd_predicated: - case Intrinsic::arm_mve_vqdmull: - case Intrinsic::arm_mve_vqdmull_predicated: - case Intrinsic::arm_mve_vqdmulh: - case Intrinsic::arm_mve_qdmulh_predicated: - case Intrinsic::arm_mve_vqrdmulh: - case Intrinsic::arm_mve_qrdmulh_predicated: - case Intrinsic::arm_mve_fma_predicated: - return true; - case Intrinsic::ssub_sat: - case Intrinsic::usub_sat: - case Intrinsic::arm_mve_sub_predicated: - case Intrinsic::arm_mve_qsub_predicated: - case Intrinsic::arm_mve_hsub_predicated: - case Intrinsic::arm_mve_vhsub: - return Operand == 1; - default: - return false; - } - } - return false; - default: - return false; - } - }; - - for (auto OpIdx : enumerate(I->operands())) { - Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get()); - // Make sure we are not already sinking this operand - if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; })) - continue; - - Instruction *Shuffle = Op; - if (Shuffle->getOpcode() == Instruction::BitCast) - Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0)); - // We are looking for a splat that can be sunk. - if (!Shuffle || - !match(Shuffle, m_Shuffle( - m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()), - m_Undef(), m_ZeroMask()))) - continue; - if (!IsSinker(I, OpIdx.index())) - continue; - - // All uses of the shuffle should be sunk to avoid duplicating it across gpr - // and vector registers - for (Use &U : Op->uses()) { - Instruction *Insn = cast<Instruction>(U.getUser()); - if (!IsSinker(Insn, U.getOperandNo())) - return false; - } - - Ops.push_back(&Shuffle->getOperandUse(0)); - if (Shuffle != Op) - Ops.push_back(&Op->getOperandUse(0)); - Ops.push_back(&OpIdx.value()); - } - return true; -} - Type *ARMTargetLowering::shouldConvertSplatType(ShuffleVectorInst *SVI) const { if (!Subtarget->hasMVEIntegerOps()) return nullptr; @@ -19436,10 +19344,10 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { // If there's more than one user instruction, the loadext is desirable no // matter what. There can be two uses by the same instruction. if (ExtVal->use_empty() || - !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode())) + !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode())) return true; - SDNode *U = *ExtVal->use_begin(); + SDNode *U = *ExtVal->user_begin(); if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB || U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM)) return false; @@ -19475,6 +19383,9 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { /// patterns (and we don't have the non-fused floating point instruction). bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { + if (Subtarget->useSoftFloat()) + return false; + if (!VT.isSimple()) return false; @@ -20177,14 +20088,13 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // CSINV: KnownOp0 or ~KnownOp1 // CSNEG: KnownOp0 or KnownOp1 * -1 if (Op.getOpcode() == ARMISD::CSINC) - KnownOp1 = KnownBits::computeForAddSub( - /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownOp1, - KnownBits::makeConstant(APInt(32, 1))); + KnownOp1 = + KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1))); else if (Op.getOpcode() == ARMISD::CSINV) std::swap(KnownOp1.Zero, KnownOp1.One); else if (Op.getOpcode() == ARMISD::CSNEG) - KnownOp1 = KnownBits::mul( - KnownOp1, KnownBits::makeConstant(APInt(32, -1))); + KnownOp1 = KnownBits::mul(KnownOp1, + KnownBits::makeConstant(APInt::getAllOnes(32))); Known = KnownOp0.intersectWith(KnownOp1); break; @@ -20663,7 +20573,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, } return; } - Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType()); + Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType()); break; } @@ -21049,7 +20959,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.arg_size() - 1); Info.offset = 0; - Info.align.reset(); + Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne(); // volatile loads with NEON intrinsics not supported Info.flags = MachineMemOperand::MOLoad; return true; @@ -21096,7 +21006,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; - Info.align.reset(); + Info.align = I.getParamAlign(0).valueOrOne(); // volatile stores with NEON intrinsics not supported Info.flags = MachineMemOperand::MOStore; return true; @@ -21262,30 +21172,26 @@ bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, Instruction *ARMTargetLowering::makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const { - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - // First, if the target has no DMB, see what fallback we can use. if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { - Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(7), Builder.getInt32(10), Builder.getInt32(5)}; - return Builder.CreateCall(MCR, args); + return Builder.CreateIntrinsic(Intrinsic::arm_mcr, {}, args); } else { // Instead of using barriers, atomic accesses on these subtargets use // libcalls. llvm_unreachable("makeDMB on a target so old that it has no barriers"); } } else { - Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); // Only a full system barrier exists in the M-class architectures. Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; Constant *CDomain = Builder.getInt32(Domain); - return Builder.CreateCall(DMB, CDomain); + return Builder.CreateIntrinsic(Intrinsic::arm_dmb, {}, CDomain); } } @@ -21430,7 +21336,7 @@ bool ARMTargetLowering::shouldInsertFencesForAtomic( return InsertFencesForAtomic; } -bool ARMTargetLowering::useLoadStackGuardNode() const { +bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const { // ROPI/RWPI are not supported currently. return !Subtarget->isROPI() && !Subtarget->isRWPI(); } @@ -21538,9 +21444,9 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, if (ValueTy->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd; - Function *Ldrex = Intrinsic::getDeclaration(M, Int); - Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi"); + Value *LoHi = + Builder.CreateIntrinsic(Int, {}, Addr, /*FMFSource=*/nullptr, "lohi"); Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo"); Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi"); @@ -21554,8 +21460,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Type *Tys[] = { Addr->getType() }; Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex; - Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys); - CallInst *CI = Builder.CreateCall(Ldrex, Addr); + CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr); CI->addParamAttr( 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy)); @@ -21566,8 +21471,7 @@ void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance( IRBuilderBase &Builder) const { if (!Subtarget->hasV7Ops()) return; - Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex)); + Builder.CreateIntrinsic(Intrinsic::arm_clrex, {}, {}); } Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder, @@ -21582,19 +21486,18 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder, if (Val->getType()->getPrimitiveSizeInBits() == 64) { Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd; - Function *Strex = Intrinsic::getDeclaration(M, Int); Type *Int32Ty = Type::getInt32Ty(M->getContext()); Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); if (!Subtarget->isLittle()) std::swap(Lo, Hi); - return Builder.CreateCall(Strex, {Lo, Hi, Addr}); + return Builder.CreateIntrinsic(Int, {}, {Lo, Hi, Addr}); } Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex; Type *Tys[] = { Addr->getType() }; - Function *Strex = Intrinsic::getDeclaration(M, Int, Tys); + Function *Strex = Intrinsic::getOrInsertDeclaration(M, Int, Tys); CallInst *CI = Builder.CreateCall( Strex, {Builder.CreateZExtOrBitCast( @@ -21722,14 +21625,13 @@ bool ARMTargetLowering::lowerInterleavedLoad( static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2, Intrinsic::arm_neon_vld3, Intrinsic::arm_neon_vld4}; - Function *VldnFunc = - Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys); SmallVector<Value *, 2> Ops; Ops.push_back(BaseAddr); Ops.push_back(Builder.getInt32(LI->getAlign().value())); - return Builder.CreateCall(VldnFunc, Ops, "vldN"); + return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops, + /*FMFSource=*/nullptr, "vldN"); } else { assert((Factor == 2 || Factor == 4) && "expected interleave factor of 2 or 4 for MVE"); @@ -21737,12 +21639,11 @@ bool ARMTargetLowering::lowerInterleavedLoad( Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q; Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace()); Type *Tys[] = {VecTy, PtrTy}; - Function *VldnFunc = - Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys); SmallVector<Value *, 2> Ops; Ops.push_back(BaseAddr); - return Builder.CreateCall(VldnFunc, Ops, "vldN"); + return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr, + "vldN"); } }; @@ -21883,14 +21784,11 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace()); Type *Tys[] = {PtrTy, SubVecTy}; - Function *VstNFunc = Intrinsic::getDeclaration( - SI->getModule(), StoreInts[Factor - 2], Tys); - SmallVector<Value *, 6> Ops; Ops.push_back(BaseAddr); append_range(Ops, Shuffles); Ops.push_back(Builder.getInt32(SI->getAlign().value())); - Builder.CreateCall(VstNFunc, Ops); + Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops); } else { assert((Factor == 2 || Factor == 4) && "expected interleave factor of 2 or 4 for MVE"); @@ -21898,15 +21796,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q; Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace()); Type *Tys[] = {PtrTy, SubVecTy}; - Function *VstNFunc = - Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys); SmallVector<Value *, 6> Ops; Ops.push_back(BaseAddr); append_range(Ops, Shuffles); for (unsigned F = 0; F < Factor; F++) { Ops.push_back(Builder.getInt32(F)); - Builder.CreateCall(VstNFunc, Ops); + Builder.CreateIntrinsic(StoreInts, Tys, Ops); Ops.pop_back(); } } @@ -22020,7 +21916,9 @@ Align ARMTargetLowering::getABIAlignmentForCallingConv( // Avoid over-aligning vector parameters. It would require realigning the // stack and waste space for no real benefit. - return std::min(ABITypeAlign, DL.getStackAlignment()); + MaybeAlign StackAlign = DL.getStackAlignment(); + assert(StackAlign && "data layout string is missing stack alignment"); + return std::min(ABITypeAlign, *StackAlign); } /// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of |
