aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM/ARMISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/ARM/ARMISelLowering.cpp')
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp1002
1 files changed, 450 insertions, 552 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 75d16a42d020..bd8d6079e1ba 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -83,13 +83,11 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrItineraries.h"
-#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSchedule.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/BranchProbability.h"
@@ -151,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));
+/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
+constexpr MVT FlagsVT = MVT::i32;
+
// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -803,6 +804,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setAllExpand(MVT::bf16);
if (!Subtarget->hasFullFP16())
setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ } else {
+ setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_BF16, MVT::f32, Custom);
+ setOperationAction(ISD::FP_TO_BF16, MVT::f64, Custom);
}
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
@@ -1109,12 +1115,15 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
for (MVT VT : MVT::fp_valuetypes()) {
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, MVT::bf16, Expand);
}
// ... or truncating stores
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
// ARM does not have i1 sign extending load.
for (MVT VT : MVT::integer_valuetypes())
@@ -1635,8 +1644,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
// Prefer likely predicted branches to selects on out-of-order cores.
PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
- setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
- setPrefFunctionAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
+ setPrefLoopAlignment(Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
+ setPrefFunctionAlignment(
+ Align(1ULL << Subtarget->getPreferBranchLogAlignment()));
setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
}
@@ -1731,14 +1741,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::ASRL)
MAKE_CASE(ARMISD::LSRL)
MAKE_CASE(ARMISD::LSLL)
- MAKE_CASE(ARMISD::SRL_GLUE)
- MAKE_CASE(ARMISD::SRA_GLUE)
+ MAKE_CASE(ARMISD::LSLS)
+ MAKE_CASE(ARMISD::LSRS1)
+ MAKE_CASE(ARMISD::ASRS1)
MAKE_CASE(ARMISD::RRX)
MAKE_CASE(ARMISD::ADDC)
MAKE_CASE(ARMISD::ADDE)
MAKE_CASE(ARMISD::SUBC)
MAKE_CASE(ARMISD::SUBE)
- MAKE_CASE(ARMISD::LSLS)
MAKE_CASE(ARMISD::VMOVRRD)
MAKE_CASE(ARMISD::VMOVDRR)
MAKE_CASE(ARMISD::VMOVhr)
@@ -2324,6 +2334,59 @@ std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
return std::make_pair(DstAddr, DstInfo);
}
+// Returns the type of copying which is required to set up a byval argument to
+// a tail-called function. This isn't needed for non-tail calls, because they
+// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
+// avoid clobbering another argument (CopyViaTemp), and sometimes can be
+// optimised to zero copies when forwarding an argument from the caller's
+// caller (NoCopy).
+ARMTargetLowering::ByValCopyKind ARMTargetLowering::ByValNeedsCopyForTailCall(
+ SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+
+ // Globals are always safe to copy from.
+ if (isa<GlobalAddressSDNode>(Src) || isa<ExternalSymbolSDNode>(Src))
+ return CopyOnce;
+
+ // Can only analyse frame index nodes, conservatively assume we need a
+ // temporary.
+ auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
+ auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
+ if (!SrcFrameIdxNode || !DstFrameIdxNode)
+ return CopyViaTemp;
+
+ int SrcFI = SrcFrameIdxNode->getIndex();
+ int DstFI = DstFrameIdxNode->getIndex();
+ assert(MFI.isFixedObjectIndex(DstFI) &&
+ "byval passed in non-fixed stack slot");
+
+ int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
+ int64_t DstOffset = MFI.getObjectOffset(DstFI);
+
+ // If the source is in the local frame, then the copy to the argument memory
+ // is always valid.
+ bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
+ if (!FixedSrc ||
+ (FixedSrc && SrcOffset < -(int64_t)AFI->getArgRegsSaveSize()))
+ return CopyOnce;
+
+ // In the case of byval arguments split between registers and the stack,
+ // computeAddrForCallArg returns a FrameIndex which corresponds only to the
+ // stack portion, but the Src SDValue will refer to the full value, including
+ // the local stack memory that the register portion gets stored into. We only
+ // need to compare them for equality, so normalise on the full value version.
+ uint64_t RegSize = Flags.getByValSize() - MFI.getObjectSize(DstFI);
+ DstOffset -= RegSize;
+
+ // If the value is already in the correct location, then no copying is
+ // needed. If not, then we need to copy via a temporary.
+ if (SrcOffset == DstOffset)
+ return NoCopy;
+ else
+ return CopyViaTemp;
+}
+
void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
SDValue Chain, SDValue &Arg,
RegsToPassVector &RegsToPass,
@@ -2379,6 +2442,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MachineFunction::CallSiteInfo CSInfo;
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
@@ -2407,8 +2471,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
// For both the non-secure calls and the returns from a CMSE entry function,
- // the function needs to do some extra work afte r the call, or before the
- // return, respectively, thus it cannot end with atail call
+ // the function needs to do some extra work after the call, or before the
+ // return, respectively, thus it cannot end with a tail call
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
isTailCall = false;
@@ -2461,8 +2525,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Since callee will pop argument stack as a tail call, we must keep the
// popped size 16-byte aligned.
- Align StackAlign = DAG.getDataLayout().getStackAlignment();
- NumBytes = alignTo(NumBytes, StackAlign);
+ MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
+ assert(StackAlign && "data layout string is missing stack alignment");
+ NumBytes = alignTo(NumBytes, *StackAlign);
// SPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
@@ -2490,6 +2555,66 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
+ // If we are doing a tail-call, any byval arguments will be written to stack
+ // space which was used for incoming arguments. If any the values being used
+ // are incoming byval arguments to this function, then they might be
+ // overwritten by the stores of the outgoing arguments. To avoid this, we
+ // need to make a temporary copy of them in local stack space, then copy back
+ // to the argument area.
+ DenseMap<unsigned, SDValue> ByValTemporaries;
+ SDValue ByValTempChain;
+ if (isTailCall) {
+ SmallVector<SDValue, 8> ByValCopyChains;
+ for (const CCValAssign &VA : ArgLocs) {
+ unsigned ArgIdx = VA.getValNo();
+ SDValue Src = OutVals[ArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
+
+ if (!Flags.isByVal())
+ continue;
+
+ SDValue Dst;
+ MachinePointerInfo DstInfo;
+ std::tie(Dst, DstInfo) =
+ computeAddrForCallArg(dl, DAG, VA, SDValue(), true, SPDiff);
+ ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
+
+ if (Copy == NoCopy) {
+ // If the argument is already at the correct offset on the stack
+ // (because we are forwarding a byval argument from our caller), we
+ // don't need any copying.
+ continue;
+ } else if (Copy == CopyOnce) {
+ // If the argument is in our local stack frame, no other argument
+ // preparation can clobber it, so we can copy it to the final location
+ // later.
+ ByValTemporaries[ArgIdx] = Src;
+ } else {
+ assert(Copy == CopyViaTemp && "unexpected enum value");
+ // If we might be copying this argument from the outgoing argument
+ // stack area, we need to copy via a temporary in the local stack
+ // frame.
+ int TempFrameIdx = MFI.CreateStackObject(
+ Flags.getByValSize(), Flags.getNonZeroByValAlign(), false);
+ SDValue Temp =
+ DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
+
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
+ SDValue AlignNode =
+ DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
+
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = {Chain, Temp, Src, SizeNode, AlignNode};
+ ByValCopyChains.push_back(
+ DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, Ops));
+ ByValTemporaries[ArgIdx] = Temp;
+ }
+ }
+ if (!ByValCopyChains.empty())
+ ByValTempChain =
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
+ }
+
// During a tail call, stores to the argument area must happen after all of
// the function's incoming arguments have been loaded because they may alias.
// This is done by folding in a TokenFactor from LowerFormalArguments, but
@@ -2527,6 +2652,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
Chain = DAG.getStackArgumentTokenFactor(Chain);
+ if (ByValTempChain)
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
+ ByValTempChain);
AfterFormalArgLoads = true;
}
@@ -2598,8 +2726,18 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
- if (CurByValIdx < ByValArgsCount) {
+ SDValue ByValSrc;
+ bool NeedsStackCopy;
+ if (ByValTemporaries.contains(realArgIdx)) {
+ ByValSrc = ByValTemporaries[realArgIdx];
+ NeedsStackCopy = true;
+ } else {
+ ByValSrc = Arg;
+ NeedsStackCopy = !isTailCall;
+ }
+ // If part of the argument is in registers, load them.
+ if (CurByValIdx < ByValArgsCount) {
unsigned RegBegin, RegEnd;
CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
@@ -2608,7 +2746,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned int i, j;
for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
- SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, Const);
SDValue Load =
DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
DAG.InferPtrAlign(AddArg));
@@ -2623,14 +2761,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.nextInRegsParam();
}
- if (Flags.getByValSize() > 4*offset) {
+ // If the memory part of the argument isn't already in the correct place
+ // (which can happen with tail calls), copy it into the argument area.
+ if (NeedsStackCopy && Flags.getByValSize() > 4 * offset) {
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Dst;
MachinePointerInfo DstInfo;
std::tie(Dst, DstInfo) =
computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
- SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
+ SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, ByValSrc, SrcOffset);
SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
MVT::i32);
SDValue AlignNode =
@@ -2841,7 +2981,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(Callee);
if (isTailCall) {
- Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
+ Ops.push_back(DAG.getSignedTargetConstant(SPDiff, dl, MVT::i32));
}
// Add argument registers to the end of the list so that they are known live
@@ -2872,17 +3012,16 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (InGlue.getNode())
Ops.push_back(InGlue);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall) {
MF.getFrameInfo().setHasTailCall();
- SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
+ SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, MVT::Other, Ops);
DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
return Ret;
}
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
+ Chain = DAG.getNode(CallOpc, dl, {MVT::Other, MVT::Glue}, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
InGlue = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -2892,7 +3031,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// we need to undo that after it returns to restore the status-quo.
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
uint64_t CalleePopBytes =
- canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
+ canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1U;
Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
if (!Ins.empty())
@@ -2914,7 +3053,7 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
// Byval (as with any stack) slots are always at least 4 byte aligned.
Alignment = std::max(Alignment, Align(4));
- unsigned Reg = State->AllocateReg(GPRArgRegs);
+ MCRegister Reg = State->AllocateReg(GPRArgRegs);
if (!Reg)
return;
@@ -2959,50 +3098,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
Size = std::max<int>(Size - Excess, 0);
}
-/// MatchingStackOffset - Return true if the given stack call argument is
-/// already available in the same position (relatively) of the caller's
-/// incoming argument stack.
-static
-bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
- MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII) {
- unsigned Bytes = Arg.getValueSizeInBits() / 8;
- int FI = std::numeric_limits<int>::max();
- if (Arg.getOpcode() == ISD::CopyFromReg) {
- Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR.isVirtual())
- return false;
- MachineInstr *Def = MRI->getVRegDef(VR);
- if (!Def)
- return false;
- if (!Flags.isByVal()) {
- if (!TII->isLoadFromStackSlot(*Def, FI))
- return false;
- } else {
- return false;
- }
- } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
- if (Flags.isByVal())
- // ByVal argument is passed in as a pointer but it's now being
- // dereferenced. e.g.
- // define @foo(%struct.X* %A) {
- // tail call @bar(%struct.X* byval %A)
- // }
- return false;
- SDValue Ptr = Ld->getBasePtr();
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
- if (!FINode)
- return false;
- FI = FINode->getIndex();
- } else
- return false;
-
- assert(FI != std::numeric_limits<int>::max());
- if (!MFI.isFixedObjectIndex(FI))
- return false;
- return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
-}
-
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function. Note that this function also
@@ -3024,19 +3119,30 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
assert(Subtarget->supportsTailCall());
- // Indirect tail calls cannot be optimized for Thumb1 if the args
- // to the call take up r0-r3. The reason is that there are no legal registers
- // left to hold the pointer to the function to be called.
- // Similarly, if the function uses return address sign and authentication,
- // r12 is needed to hold the PAC and is not available to hold the callee
- // address.
- if (Outs.size() >= 4 &&
- (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
- if (Subtarget->isThumb1Only())
- return false;
- // Conservatively assume the function spills LR.
- if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true))
+ // Indirect tail-calls require a register to hold the target address. That
+ // register must be:
+ // * Allocatable (i.e. r0-r7 if the target is Thumb1).
+ // * Not callee-saved, so must be one of r0-r3 or r12.
+ // * Not used to hold an argument to the tail-called function, which might be
+ // in r0-r3.
+ // * Not used to hold the return address authentication code, which is in r12
+ // if enabled.
+ // Sometimes, no register matches all of these conditions, so we can't do a
+ // tail-call.
+ if (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect) {
+ SmallSet<MCPhysReg, 5> AddressRegisters;
+ for (Register R : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+ AddressRegisters.insert(R);
+ if (!(Subtarget->isThumb1Only() ||
+ MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(true)))
+ AddressRegisters.insert(ARM::R12);
+ for (const CCValAssign &AL : ArgLocs)
+ if (AL.isRegLoc())
+ AddressRegisters.erase(AL.getLocReg());
+ if (AddressRegisters.empty()) {
+ LLVM_DEBUG(dbgs() << "false (no reg to hold function pointer)\n");
return false;
+ }
}
// Look for obvious safe cases to perform tail call optimization that do not
@@ -3045,18 +3151,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
- if (CallerF.hasFnAttribute("interrupt"))
+ if (CallerF.hasFnAttribute("interrupt")) {
+ LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
return false;
+ }
- if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
+ if (canGuaranteeTCO(CalleeCC,
+ getTargetMachine().Options.GuaranteedTailCallOpt)) {
+ LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
+ << " (guaranteed tail-call CC)\n");
return CalleeCC == CallerCC;
+ }
// Also avoid sibcall optimization if either caller or callee uses struct
// return semantics.
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
- if (isCalleeStructRet || isCallerStructRet)
+ if (isCalleeStructRet != isCallerStructRet) {
+ LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
return false;
+ }
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
@@ -3069,8 +3183,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
- (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+ TT.isOSBinFormatMachO())) {
+ LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
return false;
+ }
}
// Check that the call results are passed in the same way.
@@ -3079,70 +3196,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
getEffectiveCallingConv(CalleeCC, isVarArg),
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
CCAssignFnForReturn(CalleeCC, isVarArg),
- CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
+ CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
+ LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
return false;
+ }
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
- if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
+ LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
return false;
+ }
}
- // If Caller's vararg or byval argument has been split between registers and
- // stack, do not perform tail call, since part of the argument is in caller's
- // local frame.
+ // If Caller's vararg argument has been split between registers and stack, do
+ // not perform tail call, since part of the argument is in caller's local
+ // frame.
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getArgRegsSaveSize())
+ if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
+ LLVM_DEBUG(dbgs() << "false (arg reg save area)\n");
return false;
+ }
// If the callee takes no arguments then go on to check the results of the
// call.
- if (!Outs.empty()) {
- if (CCInfo.getStackSize()) {
- // Check if the arguments are already laid out in the right way as
- // the caller's fixed stack objects.
- MachineFrameInfo &MFI = MF.getFrameInfo();
- const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
- i != e;
- ++i, ++realArgIdx) {
- CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = OutVals[realArgIdx];
- ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
- if (VA.getLocInfo() == CCValAssign::Indirect)
- return false;
- if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
- // f64 and vector types are split into multiple registers or
- // register/stack-slot combinations. The types will not match
- // the registers; give up on memory f64 refs until we figure
- // out what to do about this.
- if (!VA.isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (RegVT == MVT::v2f64) {
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- }
- } else if (!VA.isRegLoc()) {
- if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
- MFI, MRI, TII))
- return false;
- }
- }
- }
-
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
- return false;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
+ LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
+ return false;
}
+ // If the stack arguments for this call do not fit into our own save area then
+ // the call cannot be made tail.
+ if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "true\n");
return true;
}
@@ -3150,7 +3241,7 @@ bool
ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const {
+ LLVMContext &Context, const Type *RetTy) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
@@ -3373,7 +3464,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return false;
SDValue TCChain = Chain;
- SDNode *Copy = *N->use_begin();
+ SDNode *Copy = *N->user_begin();
if (Copy->getOpcode() == ISD::CopyToReg) {
// If the copy has a glue operand, we conservatively assume it isn't safe to
// perform a tail call.
@@ -3384,7 +3475,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
SDNode *VMov = Copy;
// f64 returned in a pair of GPRs.
SmallPtrSet<SDNode*, 2> Copies;
- for (SDNode *U : VMov->uses()) {
+ for (SDNode *U : VMov->users()) {
if (U->getOpcode() != ISD::CopyToReg)
return false;
Copies.insert(U);
@@ -3392,7 +3483,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (Copies.size() > 2)
return false;
- for (SDNode *U : VMov->uses()) {
+ for (SDNode *U : VMov->users()) {
SDValue UseChain = U->getOperand(0);
if (Copies.count(UseChain.getNode()))
// Second CopyToReg
@@ -3411,7 +3502,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
// f32 returned in a single GPR.
if (!Copy->hasOneUse())
return false;
- Copy = *Copy->use_begin();
+ Copy = *Copy->user_begin();
if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
return false;
// If the copy has a glue operand, we conservatively assume it isn't safe to
@@ -3424,7 +3515,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
}
bool HasRet = false;
- for (const SDNode *U : Copy->uses()) {
+ for (const SDNode *U : Copy->users()) {
if (U->getOpcode() != ARMISD::RET_GLUE &&
U->getOpcode() != ARMISD::INTRET_GLUE)
return false;
@@ -4701,8 +4792,9 @@ SDValue ARMTargetLowering::LowerFormalArguments(
if (canGuaranteeTCO(CallConv, TailCallOpt)) {
// The only way to guarantee a tail call is if the callee restores its
// argument area, but it must also keep the stack aligned when doing so.
- const DataLayout &DL = DAG.getDataLayout();
- StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
+ MaybeAlign StackAlign = DAG.getDataLayout().getStackAlignment();
+ assert(StackAlign && "data layout string is missing stack alignment");
+ StackArgSize = alignTo(StackArgSize, *StackAlign);
AFI->setArgumentStackToRestore(StackArgSize);
}
@@ -4840,14 +4932,11 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
LHS.getConstantOperandVal(1) < 31) {
unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
- SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
- DAG.getVTList(MVT::i32, MVT::i32),
- LHS.getOperand(0),
- DAG.getConstant(ShiftAmt, dl, MVT::i32));
- SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
- Shift.getValue(1), SDValue());
+ SDValue Shift =
+ DAG.getNode(ARMISD::LSLS, dl, DAG.getVTList(MVT::i32, FlagsVT),
+ LHS.getOperand(0), DAG.getConstant(ShiftAmt, dl, MVT::i32));
ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
- return Chain.getValue(1);
+ return Shift.getValue(1);
}
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
@@ -4879,7 +4968,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
break;
}
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+ return DAG.getNode(CompareType, dl, FlagsVT, LHS, RHS);
}
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
@@ -4887,35 +4976,14 @@ SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
SelectionDAG &DAG, const SDLoc &dl,
bool Signaling) const {
assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
- SDValue Cmp;
+ SDValue Flags;
if (!isFloatingPointZero(RHS))
- Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
- dl, MVT::Glue, LHS, RHS);
+ Flags = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP, dl, FlagsVT,
+ LHS, RHS);
else
- Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
- dl, MVT::Glue, LHS);
- return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
-}
-
-/// duplicateCmp - Glue values can have only one use, so this function
-/// duplicates a comparison node.
-SDValue
-ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
- unsigned Opc = Cmp.getOpcode();
- SDLoc DL(Cmp);
- if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
- return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
-
- assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
- Cmp = Cmp.getOperand(0);
- Opc = Cmp.getOpcode();
- if (Opc == ARMISD::CMPFP)
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
- else {
- assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
- Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
- }
- return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+ Flags = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0, dl,
+ FlagsVT, LHS);
+ return DAG.getNode(ARMISD::FMSTAT, dl, FlagsVT, Flags);
}
// This function returns three things: the arithmetic computation itself
@@ -4943,7 +5011,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
case ISD::SADDO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
break;
case ISD::UADDO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
@@ -4952,17 +5020,17 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ARMISD::ADDC, dl,
DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
.getValue(0);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value, LHS);
break;
case ISD::SSUBO:
ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
break;
case ISD::USUBO:
ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, LHS, RHS);
break;
case ISD::UMULO:
// We generate a UMUL_LOHI and then check if the high word is 0.
@@ -4970,7 +5038,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::UMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
DAG.getConstant(0, dl, MVT::i32));
Value = Value.getValue(0); // We only want the low 32 bits for the result.
break;
@@ -4981,7 +5049,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
Value = DAG.getNode(ISD::SMUL_LOHI, dl,
DAG.getVTList(Op.getValueType(), Op.getValueType()),
LHS, RHS);
- OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
+ OverflowCmp = DAG.getNode(ARMISD::CMP, dl, FlagsVT, Value.getValue(1),
DAG.getNode(ISD::SRA, dl, Op.getValueType(),
Value.getValue(0),
DAG.getConstant(31, dl, MVT::i32)));
@@ -5001,15 +5069,14 @@ ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDLoc dl(Op);
// We use 0 and 1 as false and true values.
SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
EVT VT = Op.getValueType();
- SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
- ARMcc, CCR, OverflowCmp);
+ SDValue Overflow =
+ DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal, ARMcc, OverflowCmp);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
@@ -5146,11 +5213,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Value, OverflowCmp;
SDValue ARMcc;
std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
EVT VT = Op.getValueType();
- return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
- OverflowCmp, DAG);
+ return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, OverflowCmp, DAG);
}
// Convert:
@@ -5178,14 +5243,9 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
False = SelectTrue;
}
- if (True.getNode() && False.getNode()) {
- EVT VT = Op.getValueType();
- SDValue ARMcc = Cond.getOperand(2);
- SDValue CCR = Cond.getOperand(3);
- SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
- assert(True.getValueType() == VT);
- return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
- }
+ if (True.getNode() && False.getNode())
+ return getCMOV(dl, Op.getValueType(), True, False, Cond.getOperand(2),
+ Cond.getOperand(3), DAG);
}
}
@@ -5250,8 +5310,8 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
}
SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
- SDValue TrueVal, SDValue ARMcc, SDValue CCR,
- SDValue Cmp, SelectionDAG &DAG) const {
+ SDValue TrueVal, SDValue ARMcc,
+ SDValue Flags, SelectionDAG &DAG) const {
if (!Subtarget->hasFP64() && VT == MVT::f64) {
FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
@@ -5264,15 +5324,13 @@ SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
SDValue FalseHigh = FalseVal.getValue(1);
SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
- ARMcc, CCR, Cmp);
+ ARMcc, Flags);
SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
- ARMcc, CCR, duplicateCmp(Cmp, DAG));
+ ARMcc, Flags);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
- } else {
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
- Cmp);
}
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, Flags);
}
static bool isGTorGE(ISD::CondCode CC) {
@@ -5545,12 +5603,11 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
}
SDValue ARMcc;
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
// Choose GE over PL, which vsel does now support
if (ARMcc->getAsZExtVal() == ARMCC::PL)
ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
- return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
+ return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
}
ARMCC::CondCodes CondCode, CondCode2;
@@ -5580,13 +5637,10 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
+ SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
- // FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
- Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
+ Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, Cmp, DAG);
}
return Result;
}
@@ -5687,9 +5741,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
bitcastf32Toi32(RHS, DAG), Mask);
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
- Chain, Dest, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
+ Cmp);
}
SDValue LHS1, LHS2;
@@ -5700,9 +5753,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
- return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
+ return DAG.getNode(ARMISD::BCC_i64, dl, MVT::Other, Ops);
}
return SDValue();
@@ -5736,9 +5788,8 @@ SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
(ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
CondCode = ARMCC::getOppositeCondition(CondCode);
ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
OverflowCmp);
}
@@ -5790,18 +5841,15 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
CondCode = ARMCC::getOppositeCondition(CondCode);
ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
}
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc,
OverflowCmp);
}
if (LHS.getValueType() == MVT::i32) {
SDValue ARMcc;
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
- Chain, Dest, ARMcc, CCR, Cmp);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, Cmp);
}
if (getTargetMachine().Options.UnsafeFPMath &&
@@ -5816,14 +5864,12 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
- SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
+ SDValue Ops[] = {Chain, Dest, ARMcc, Cmp};
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
- SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
- Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
+ SDValue Ops[] = {Res, Dest, ARMcc, Cmp};
+ Res = DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Ops);
}
return Res;
}
@@ -5966,7 +6012,7 @@ static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
DAG.getConstant((1 << BW) - 1, DL, VT));
if (IsSigned)
Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
- DAG.getConstant(-(1 << BW), DL, VT));
+ DAG.getSignedConstant(-(1 << BW), DL, VT));
return Max;
}
@@ -6263,10 +6309,13 @@ SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
- (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
+ (SrcVT == MVT::f16 || SrcVT == MVT::bf16)) {
+ if (Subtarget->hasFullFP16() && !Subtarget->hasBF16())
+ Op = DAG.getBitcast(MVT::f16, Op);
return DAG.getNode(
ISD::TRUNCATE, SDLoc(N), DstVT,
MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
+ }
if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
return SDValue();
@@ -6328,7 +6377,6 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -6343,8 +6391,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
- ARMcc, CCR, CmpLo);
+ SDValue Lo =
+ DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CmpLo);
SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue HiBigShift = Opc == ISD::SRA
@@ -6353,8 +6401,8 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
: DAG.getConstant(0, dl, VT);
SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
- ARMcc, CCR, CmpHi);
+ SDValue Hi =
+ DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -6372,7 +6420,6 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
SDValue ARMcc;
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -6386,14 +6433,14 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
- SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
- ARMcc, CCR, CmpHi);
+ SDValue Hi =
+ DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, ARMcc, CmpHi);
SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
ISD::SETGE, ARMcc, DAG, dl);
SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
- DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
+ DAG.getConstant(0, dl, VT), ARMcc, CmpLo);
SDValue Ops[2] = { Lo, Hi };
return DAG.getMergeValues(Ops, dl);
@@ -6765,10 +6812,10 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
- // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
- // captures the result into a carry flag.
- unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
+ // First, build a LSRS1/ASRS1 op, which shifts the top part by one and
+ // captures the shifted out bit into a carry flag.
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
@@ -6980,11 +7027,8 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
SDValue ARMcc = DAG.getConstant(
IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
- Cmp.getValue(1), SDValue());
return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
- CCR, Chain.getValue(1));
+ Cmp.getValue(1));
}
/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
@@ -7111,19 +7155,6 @@ static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
ImmMask <<= 1;
}
- if (DAG.getDataLayout().isBigEndian()) {
- // Reverse the order of elements within the vector.
- unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
- unsigned Mask = (1 << BytesPerElem) - 1;
- unsigned NumElems = 8 / BytesPerElem;
- unsigned NewImm = 0;
- for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
- unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
- NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
- }
- Imm = NewImm;
- }
-
// Op=1, Cmode=1110.
OpCmode = 0x1e;
VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
@@ -7879,6 +7910,8 @@ static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
case ISD::MUL:
case ISD::SADDSAT:
case ISD::UADDSAT:
+ case ISD::AVGFLOORS:
+ case ISD::AVGFLOORU:
return true;
case ISD::SUB:
case ISD::SSUBSAT:
@@ -7936,7 +7969,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// generate a vdup of the constant.
if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
(SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
- all_of(BVN->uses(),
+ all_of(BVN->users(),
[BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
: SplatBitSize == 16 ? MVT::v8i16
@@ -7956,7 +7989,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}
// Try an immediate VMVN.
@@ -7966,7 +7999,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
if (Val.getNode()) {
SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vmov);
}
// Use vmov.f32 to materialize other v2f32 and v4f32 splats.
@@ -8018,7 +8051,6 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
isConstant = false;
- ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
// Is this value dominant? (takes up more than half of the lanes)
@@ -8335,9 +8367,10 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
}
// Final check before we try to actually produce a shuffle.
- LLVM_DEBUG(for (auto Src
- : Sources)
- assert(Src.ShuffleVec.getValueType() == ShuffleVT););
+ LLVM_DEBUG({
+ for (auto Src : Sources)
+ assert(Src.ShuffleVec.getValueType() == ShuffleVT);
+ });
// The stars all align, our next step is to produce the mask for the shuffle.
SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
@@ -8542,7 +8575,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
SmallVector<SDValue, 8> VTBLMask;
for (int I : ShuffleMask)
- VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
+ VTBLMask.push_back(DAG.getSignedConstant(I, DL, MVT::i32));
if (V2.getNode()->isUndef())
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
@@ -9210,7 +9243,7 @@ static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG,
};
// Concat each pair of subvectors and pack into the lower half of the array.
- SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
+ SmallVector<SDValue> ConcatOps(Op->ops());
while (ConcatOps.size() > 1) {
for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
SDValue V1 = ConcatOps[I];
@@ -10467,33 +10500,42 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
Results.push_back(Cycles32.getValue(1));
}
-static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
- SDLoc dl(V.getNode());
- auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
- bool isBigEndian = DAG.getDataLayout().isBigEndian();
- if (isBigEndian)
- std::swap (VLo, VHi);
+static SDValue createGPRPairNode2xi32(SelectionDAG &DAG, SDValue V0,
+ SDValue V1) {
+ SDLoc dl(V0.getNode());
SDValue RegClass =
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
- const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
+ const SDValue Ops[] = {RegClass, V0, SubReg0, V1, SubReg1};
return SDValue(
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
}
+static SDValue createGPRPairNodei64(SelectionDAG &DAG, SDValue V) {
+ SDLoc dl(V.getNode());
+ auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
+ bool isBigEndian = DAG.getDataLayout().isBigEndian();
+ if (isBigEndian)
+ std::swap(VLo, VHi);
+ return createGPRPairNode2xi32(DAG, VLo, VHi);
+}
+
static void ReplaceCMP_SWAP_64Results(SDNode *N,
- SmallVectorImpl<SDValue> & Results,
- SelectionDAG &DAG) {
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
assert(N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal");
- SDValue Ops[] = {N->getOperand(1),
- createGPRPairNode(DAG, N->getOperand(2)),
- createGPRPairNode(DAG, N->getOperand(3)),
- N->getOperand(0)};
+ SDValue Ops[] = {
+ createGPRPairNode2xi32(DAG, N->getOperand(1),
+ DAG.getUNDEF(MVT::i32)), // pointer, temp
+ createGPRPairNodei64(DAG, N->getOperand(2)), // expected
+ createGPRPairNodei64(DAG, N->getOperand(3)), // new
+ N->getOperand(0), // chain in
+ };
SDNode *CmpSwap = DAG.getMachineNode(
ARM::CMP_SWAP_64, SDLoc(N),
- DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
+ DAG.getVTList(MVT::Untyped, MVT::Untyped, MVT::Other), Ops);
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
@@ -10536,21 +10578,14 @@ SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
- // in CMPFP and CMPFPE, but instead it should be made explicit by these
- // instructions using a chain instead of glue. This would also fix the problem
- // here (and also in LowerSELECT_CC) where we generate two comparisons when
- // CondCode2 != AL.
SDValue True = DAG.getConstant(1, dl, VT);
SDValue False = DAG.getConstant(0, dl, VT);
SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
- SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
- SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
+ SDValue Result = getCMOV(dl, VT, False, True, ARMcc, Cmp, DAG);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
- Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
- Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
+ Result = getCMOV(dl, VT, Result, True, ARMcc, Cmp, DAG);
}
return DAG.getMergeValues({Result, Chain}, dl);
}
@@ -10564,6 +10599,17 @@ SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
return DAG.getFrameIndex(FI, VT);
}
+SDValue ARMTargetLowering::LowerFP_TO_BF16(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ MVT SVT = Op.getOperand(0).getSimpleValueType();
+ RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, MVT::bf16);
+ SDValue Res =
+ makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
+ return DAG.getBitcast(MVT::i32, Res);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
switch (Op.getOpcode()) {
@@ -10689,6 +10735,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
case ISD::SPONENTRY:
return LowerSPONENTRY(Op, DAG);
+ case ISD::FP_TO_BF16:
+ return LowerFP_TO_BF16(Op, DAG);
case ARMISD::WIN__DBZCHK: return SDValue();
}
}
@@ -13803,6 +13851,14 @@ ARMTargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
N->getOpcode() == ISD::SRL) &&
"Expected shift op");
+ SDValue ShiftLHS = N->getOperand(0);
+ if (!ShiftLHS->hasOneUse())
+ return false;
+
+ if (ShiftLHS.getOpcode() == ISD::SIGN_EXTEND &&
+ !ShiftLHS.getOperand(0)->hasOneUse())
+ return false;
+
if (Level == BeforeLegalizeTypes)
return true;
@@ -13938,7 +13994,7 @@ static SDValue PerformSHLSimplify(SDNode *N,
return SDValue();
// Check that all the users could perform the shl themselves.
- for (auto *U : N->uses()) {
+ for (auto *U : N->users()) {
switch(U->getOpcode()) {
default:
return SDValue();
@@ -14435,9 +14491,9 @@ static SDValue PerformANDCombine(SDNode *N,
DAG, dl, VbicVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
- DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+ DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VbicVT, N->getOperand(0));
SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vbic);
}
}
}
@@ -14731,9 +14787,9 @@ static SDValue PerformORCombine(SDNode *N,
SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
if (Val.getNode()) {
SDValue Input =
- DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+ DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VorrVT, N->getOperand(0));
SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
- return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+ return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Vorr);
}
}
}
@@ -14980,7 +15036,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
}
// Check that N is CMPZ(CSINC(0, 0, CC, X)),
-// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
+// or CMPZ(CMOV(1, 0, CC, X))
// return X if valid.
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
@@ -15004,22 +15060,22 @@ static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC) {
if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
CC = (ARMCC::CondCodes)CSInc.getConstantOperandVal(2);
- return CSInc.getOperand(4);
+ return CSInc.getOperand(3);
}
if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
CC = ARMCC::getOppositeCondition(
(ARMCC::CondCodes)CSInc.getConstantOperandVal(2));
- return CSInc.getOperand(4);
+ return CSInc.getOperand(3);
}
return SDValue();
}
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG) {
// Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
- // t92: glue = ARMISD::CMPZ t74, 0
+ // t92: flags = ARMISD::CMPZ t74, 0
// t93: i32 = ARMISD::CSINC 0, 0, 1, t92
- // t96: glue = ARMISD::CMPZ t93, 0
+ // t96: flags = ARMISD::CMPZ t93, 0
// t114: i32 = ARMISD::CSINV 0, 0, 0, t96
ARMCC::CondCodes Cond;
if (SDValue C = IsCMPZCSINC(N, Cond))
@@ -15124,9 +15180,9 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
SDValue Op0, Op1;
while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (isa<ConstantSDNode>(BV.getOperand(2))) {
- if (BV.getConstantOperandVal(2) == Offset)
+ if (BV.getConstantOperandVal(2) == Offset && !Op0)
Op0 = BV.getOperand(1);
- if (BV.getConstantOperandVal(2) == Offset + 1)
+ if (BV.getConstantOperandVal(2) == Offset + 1 && !Op1)
Op1 = BV.getOperand(1);
}
BV = BV.getOperand(0);
@@ -15324,7 +15380,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
assert(EltVT == MVT::f32 && "Unexpected type!");
// Check 1.2.
- SDNode *Use = *N->use_begin();
+ SDNode *Use = *N->user_begin();
if (Use->getOpcode() != ISD::BITCAST ||
Use->getValueType(0).isFloatingPoint())
return SDValue();
@@ -15434,6 +15490,9 @@ static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG,
if (ST->isLittle())
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+ // VT VECTOR_REG_CAST (VT Op) -> Op
+ if (Op.getValueType() == VT)
+ return Op;
// VECTOR_REG_CAST undef -> undef
if (Op.isUndef())
return DAG.getUNDEF(VT);
@@ -15526,9 +15585,8 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
!isa<ConstantSDNode>(Ext.getOperand(1)) ||
Ext.getConstantOperandVal(1) % 2 != 0)
return SDValue();
- if (Ext->use_size() == 1 &&
- (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
- Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
+ if (Ext->hasOneUse() && (Ext->user_begin()->getOpcode() == ISD::SINT_TO_FP ||
+ Ext->user_begin()->getOpcode() == ISD::UINT_TO_FP))
return SDValue();
SDValue Op0 = Ext.getOperand(0);
@@ -15539,24 +15597,24 @@ PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return SDValue();
// Find another extract, of Lane + 1
- auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
+ auto OtherIt = find_if(Op0->users(), [&](SDNode *V) {
return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isa<ConstantSDNode>(V->getOperand(1)) &&
V->getConstantOperandVal(1) == Lane + 1 &&
V->getOperand(0).getResNo() == ResNo;
});
- if (OtherIt == Op0->uses().end())
+ if (OtherIt == Op0->users().end())
return SDValue();
// For float extracts, we need to be converting to a i32 for both vector
// lanes.
SDValue OtherExt(*OtherIt, 0);
if (OtherExt.getValueType() != MVT::i32) {
- if (OtherExt->use_size() != 1 ||
- OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
- OtherExt->use_begin()->getValueType(0) != MVT::i32)
+ if (!OtherExt->hasOneUse() ||
+ OtherExt->user_begin()->getOpcode() != ISD::BITCAST ||
+ OtherExt->user_begin()->getValueType(0) != MVT::i32)
return SDValue();
- OtherExt = SDValue(*OtherExt->use_begin(), 0);
+ OtherExt = SDValue(*OtherExt->user_begin(), 0);
}
// Convert the type to a f64 and extract with a VMOVRRD.
@@ -16166,14 +16224,12 @@ static SDValue CombineBaseUpdate(SDNode *N,
SmallVector<BaseUpdateUser, 8> BaseUpdates;
// Search for a use of the address operand that is an increment.
- for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
- UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- if (UI.getUse().getResNo() != Addr.getResNo() ||
- User->getNumOperands() != 2)
+ for (SDUse &Use : Addr->uses()) {
+ SDNode *User = Use.getUser();
+ if (Use.getResNo() != Addr.getResNo() || User->getNumOperands() != 2)
continue;
- SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
+ SDValue Inc = User->getOperand(Use.getOperandNo() == 1 ? 0 : 1);
unsigned ConstInc =
getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
@@ -16188,15 +16244,14 @@ static SDValue CombineBaseUpdate(SDNode *N,
if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
unsigned Offset =
getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
- for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
- UI != UE; ++UI) {
+ for (SDUse &Use : Base->uses()) {
- SDNode *User = *UI;
- if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
+ SDNode *User = Use.getUser();
+ if (Use.getResNo() != Base.getResNo() || User == Addr.getNode() ||
User->getNumOperands() != 2)
continue;
- SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
+ SDValue UserInc = User->getOperand(Use.getOperandNo() == 0 ? 1 : 0);
unsigned UserOffset =
getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
@@ -16269,12 +16324,9 @@ static SDValue PerformMVEVLDCombine(SDNode *N,
return SDValue();
// Search for a use of the address operand that is an increment.
- for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
- UE = Addr.getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
- if (User->getOpcode() != ISD::ADD ||
- UI.getUse().getResNo() != Addr.getResNo())
+ for (SDUse &Use : Addr->uses()) {
+ SDNode *User = Use.getUser();
+ if (User->getOpcode() != ISD::ADD || Use.getResNo() != Addr.getResNo())
continue;
// Check that the add is independent of the load/store. Otherwise, folding
@@ -16404,12 +16456,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// First check that all the vldN-lane uses are VDUPLANEs and that the lane
// numbers match the load.
unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
- for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
- UI != UE; ++UI) {
+ for (SDUse &Use : VLD->uses()) {
// Ignore uses of the chain result.
- if (UI.getUse().getResNo() == NumVecs)
+ if (Use.getResNo() == NumVecs)
continue;
- SDNode *User = *UI;
+ SDNode *User = Use.getUser();
if (User->getOpcode() != ARMISD::VDUPLANE ||
VLDLaneNo != User->getConstantOperandVal(1))
return false;
@@ -16429,14 +16480,12 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
VLDMemInt->getMemOperand());
// Update the uses.
- for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
- UI != UE; ++UI) {
- unsigned ResNo = UI.getUse().getResNo();
+ for (SDUse &Use : VLD->uses()) {
+ unsigned ResNo = Use.getResNo();
// Ignore uses of the chain result.
if (ResNo == NumVecs)
continue;
- SDNode *User = *UI;
- DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+ DCI.CombineTo(Use.getUser(), SDValue(VLDDup.getNode(), ResNo));
}
// Now the vldN-lane intrinsic is dead except for its chain result.
@@ -17643,6 +17692,11 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
// No immediate versions of these to check for.
break;
+ case Intrinsic::arm_neon_vbsl: {
+ SDLoc dl(N);
+ return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
case Intrinsic::arm_mve_vqdmlah:
case Intrinsic::arm_mve_vqdmlash:
case Intrinsic::arm_mve_vqrdmlah:
@@ -18102,7 +18156,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDValue Op0 = CMOV->getOperand(0);
SDValue Op1 = CMOV->getOperand(1);
auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
- SDValue CmpZ = CMOV->getOperand(4);
+ SDValue CmpZ = CMOV->getOperand(3);
// The compare must be against zero.
if (!isNullConstant(CmpZ->getOperand(1)))
@@ -18286,9 +18340,9 @@ static SDValue PerformHWLoopCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
SDValue Elements = Int.getOperand(2);
unsigned IntOp = Int->getConstantOperandVal(1);
- assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
- && "expected single br user");
- SDNode *Br = *N->use_begin();
+ assert((N->hasOneUse() && N->user_begin()->getOpcode() == ISD::BR) &&
+ "expected single br user");
+ SDNode *Br = *N->user_begin();
SDValue OtherTarget = Br->getOperand(1);
// Update the unconditional branch to branch to the given Dest.
@@ -18346,12 +18400,11 @@ static SDValue PerformHWLoopCombine(SDNode *N,
/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
SDValue
ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
- SDValue Cmp = N->getOperand(4);
+ SDValue Cmp = N->getOperand(3);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at NE cases.
return SDValue();
- EVT VT = N->getValueType(0);
SDLoc dl(N);
SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);
@@ -18360,17 +18413,17 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
SDValue ARMcc = N->getOperand(2);
ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
- // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
- // -> (brcond Chain BB CC CPSR Cmp)
+ // (brcond Chain BB ne (cmpz (and (cmov 0 1 CC Flags) 1) 0))
+ // -> (brcond Chain BB CC Flags)
if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
LHS->getOperand(0)->hasOneUse() &&
isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
- return DAG.getNode(
- ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
- LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, BB,
+ LHS->getOperand(0)->getOperand(2),
+ LHS->getOperand(0)->getOperand(3));
}
return SDValue();
@@ -18379,7 +18432,7 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
SDValue
ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
- SDValue Cmp = N->getOperand(4);
+ SDValue Cmp = N->getOperand(3);
if (Cmp.getOpcode() != ARMISD::CMPZ)
// Only looking at EQ and NE cases.
return SDValue();
@@ -18419,42 +18472,38 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
/// FIXME: Turn this into a target neutral optimization?
SDValue Res;
if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
- Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
- N->getOperand(3), Cmp);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, Cmp);
} else if (CC == ARMCC::EQ && TrueVal == RHS) {
SDValue ARMcc;
SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
- Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
- N->getOperand(3), NewCmp);
+ Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, NewCmp);
}
- // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
- // -> (cmov F T CC CPSR Cmp)
+ // (cmov F T ne (cmpz (cmov 0 1 CC Flags) 0))
+ // -> (cmov F T CC Flags)
if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
isNullConstant(RHS)) {
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- LHS->getOperand(2), LHS->getOperand(3),
- LHS->getOperand(4));
+ LHS->getOperand(2), LHS->getOperand(3));
}
if (!VT.isInteger())
return SDValue();
// Fold away an unneccessary CMPZ/CMOV
- // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
- // if C1==EQ -> CMOV A, B, C2, $cpsr, D
- // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
+ // CMOV A, B, C1, (CMPZ (CMOV 1, 0, C2, D), 0) ->
+ // if C1==EQ -> CMOV A, B, C2, D
+ // if C1==NE -> CMOV A, B, NOT(C2), D
if (N->getConstantOperandVal(2) == ARMCC::EQ ||
N->getConstantOperandVal(2) == ARMCC::NE) {
ARMCC::CondCodes Cond;
- if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
+ if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
if (N->getConstantOperandVal(2) == ARMCC::NE)
Cond = ARMCC::getOppositeCondition(Cond);
return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
N->getOperand(1),
- DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
- N->getOperand(3), C);
+ DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
}
}
@@ -18494,10 +18543,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
SDValue Sub =
DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
- SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
- Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
- N->getOperand(3), CPSRGlue.getValue(1));
+ Sub.getValue(1));
FalseVal = Sub;
}
} else if (isNullConstant(TrueVal)) {
@@ -18508,11 +18555,9 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBC x, y), z, !=, (SUBC x, y):1
SDValue Sub =
DAG.getNode(ARMISD::SUBC, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
- SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
- Sub.getValue(1), SDValue());
Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
DAG.getConstant(ARMCC::NE, dl, MVT::i32),
- N->getOperand(3), CPSRGlue.getValue(1));
+ Sub.getValue(1));
FalseVal = Sub;
}
}
@@ -18582,7 +18627,9 @@ static SDValue PerformBITCASTCombine(SDNode *N,
// We may have a bitcast of something that has already had this bitcast
// combine performed on it, so skip past any VECTOR_REG_CASTs.
- while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
+ if (Src.getOpcode() == ARMISD::VECTOR_REG_CAST &&
+ Src.getOperand(0).getValueType().getScalarSizeInBits() <=
+ Src.getValueType().getScalarSizeInBits())
Src = Src.getOperand(0);
// Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
@@ -19060,6 +19107,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
break;
}
+ case ARMISD::VBSP:
+ if (N->getOperand(1) == N->getOperand(2))
+ return N->getOperand(1);
+ return SDValue();
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (N->getConstantOperandVal(1)) {
@@ -19261,149 +19312,6 @@ bool ARMTargetLowering::isFNegFree(EVT VT) const {
return false;
}
-/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
-/// of the vector elements.
-static bool areExtractExts(Value *Ext1, Value *Ext2) {
- auto areExtDoubled = [](Instruction *Ext) {
- return Ext->getType()->getScalarSizeInBits() ==
- 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
- };
-
- if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
- !match(Ext2, m_ZExtOrSExt(m_Value())) ||
- !areExtDoubled(cast<Instruction>(Ext1)) ||
- !areExtDoubled(cast<Instruction>(Ext2)))
- return false;
-
- return true;
-}
-
-/// Check if sinking \p I's operands to I's basic block is profitable, because
-/// the operands can be folded into a target instruction, e.g.
-/// sext/zext can be folded into vsubl.
-bool ARMTargetLowering::shouldSinkOperands(Instruction *I,
- SmallVectorImpl<Use *> &Ops) const {
- if (!I->getType()->isVectorTy())
- return false;
-
- if (Subtarget->hasNEON()) {
- switch (I->getOpcode()) {
- case Instruction::Sub:
- case Instruction::Add: {
- if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
- return false;
- Ops.push_back(&I->getOperandUse(0));
- Ops.push_back(&I->getOperandUse(1));
- return true;
- }
- default:
- return false;
- }
- }
-
- if (!Subtarget->hasMVEIntegerOps())
- return false;
-
- auto IsFMSMul = [&](Instruction *I) {
- if (!I->hasOneUse())
- return false;
- auto *Sub = cast<Instruction>(*I->users().begin());
- return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
- };
- auto IsFMS = [&](Instruction *I) {
- if (match(I->getOperand(0), m_FNeg(m_Value())) ||
- match(I->getOperand(1), m_FNeg(m_Value())))
- return true;
- return false;
- };
-
- auto IsSinker = [&](Instruction *I, int Operand) {
- switch (I->getOpcode()) {
- case Instruction::Add:
- case Instruction::Mul:
- case Instruction::FAdd:
- case Instruction::ICmp:
- case Instruction::FCmp:
- return true;
- case Instruction::FMul:
- return !IsFMSMul(I);
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- return Operand == 1;
- case Instruction::Call:
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::fma:
- return !IsFMS(I);
- case Intrinsic::sadd_sat:
- case Intrinsic::uadd_sat:
- case Intrinsic::arm_mve_add_predicated:
- case Intrinsic::arm_mve_mul_predicated:
- case Intrinsic::arm_mve_qadd_predicated:
- case Intrinsic::arm_mve_vhadd:
- case Intrinsic::arm_mve_hadd_predicated:
- case Intrinsic::arm_mve_vqdmull:
- case Intrinsic::arm_mve_vqdmull_predicated:
- case Intrinsic::arm_mve_vqdmulh:
- case Intrinsic::arm_mve_qdmulh_predicated:
- case Intrinsic::arm_mve_vqrdmulh:
- case Intrinsic::arm_mve_qrdmulh_predicated:
- case Intrinsic::arm_mve_fma_predicated:
- return true;
- case Intrinsic::ssub_sat:
- case Intrinsic::usub_sat:
- case Intrinsic::arm_mve_sub_predicated:
- case Intrinsic::arm_mve_qsub_predicated:
- case Intrinsic::arm_mve_hsub_predicated:
- case Intrinsic::arm_mve_vhsub:
- return Operand == 1;
- default:
- return false;
- }
- }
- return false;
- default:
- return false;
- }
- };
-
- for (auto OpIdx : enumerate(I->operands())) {
- Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
- // Make sure we are not already sinking this operand
- if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
- continue;
-
- Instruction *Shuffle = Op;
- if (Shuffle->getOpcode() == Instruction::BitCast)
- Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
- // We are looking for a splat that can be sunk.
- if (!Shuffle ||
- !match(Shuffle, m_Shuffle(
- m_InsertElt(m_Undef(), m_Value(), m_ZeroInt()),
- m_Undef(), m_ZeroMask())))
- continue;
- if (!IsSinker(I, OpIdx.index()))
- continue;
-
- // All uses of the shuffle should be sunk to avoid duplicating it across gpr
- // and vector registers
- for (Use &U : Op->uses()) {
- Instruction *Insn = cast<Instruction>(U.getUser());
- if (!IsSinker(Insn, U.getOperandNo()))
- return false;
- }
-
- Ops.push_back(&Shuffle->getOperandUse(0));
- if (Shuffle != Op)
- Ops.push_back(&Op->getOperandUse(0));
- Ops.push_back(&OpIdx.value());
- }
- return true;
-}
-
Type *ARMTargetLowering::shouldConvertSplatType(ShuffleVectorInst *SVI) const {
if (!Subtarget->hasMVEIntegerOps())
return nullptr;
@@ -19436,10 +19344,10 @@ bool ARMTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
// If there's more than one user instruction, the loadext is desirable no
// matter what. There can be two uses by the same instruction.
if (ExtVal->use_empty() ||
- !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
+ !ExtVal->user_begin()->isOnlyUserOf(ExtVal.getNode()))
return true;
- SDNode *U = *ExtVal->use_begin();
+ SDNode *U = *ExtVal->user_begin();
if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
return false;
@@ -19475,6 +19383,9 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
/// patterns (and we don't have the non-fused floating point instruction).
bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
+ if (Subtarget->useSoftFloat())
+ return false;
+
if (!VT.isSimple())
return false;
@@ -20177,14 +20088,13 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// CSINV: KnownOp0 or ~KnownOp1
// CSNEG: KnownOp0 or KnownOp1 * -1
if (Op.getOpcode() == ARMISD::CSINC)
- KnownOp1 = KnownBits::computeForAddSub(
- /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownOp1,
- KnownBits::makeConstant(APInt(32, 1)));
+ KnownOp1 =
+ KnownBits::add(KnownOp1, KnownBits::makeConstant(APInt(32, 1)));
else if (Op.getOpcode() == ARMISD::CSINV)
std::swap(KnownOp1.Zero, KnownOp1.One);
else if (Op.getOpcode() == ARMISD::CSNEG)
- KnownOp1 = KnownBits::mul(
- KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
+ KnownOp1 = KnownBits::mul(KnownOp1,
+ KnownBits::makeConstant(APInt::getAllOnes(32)));
Known = KnownOp0.intersectWith(KnownOp1);
break;
@@ -20663,7 +20573,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
return;
}
- Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
+ Result = DAG.getSignedTargetConstant(CVal, SDLoc(Op), Op.getValueType());
break;
}
@@ -21049,7 +20959,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
Info.offset = 0;
- Info.align.reset();
+ Info.align = I.getParamAlign(I.arg_size() - 1).valueOrOne();
// volatile loads with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOLoad;
return true;
@@ -21096,7 +21006,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;
- Info.align.reset();
+ Info.align = I.getParamAlign(0).valueOrOne();
// volatile stores with NEON intrinsics not supported
Info.flags = MachineMemOperand::MOStore;
return true;
@@ -21262,30 +21172,26 @@ bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
Instruction *ARMTargetLowering::makeDMB(IRBuilderBase &Builder,
ARM_MB::MemBOpt Domain) const {
- Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-
// First, if the target has no DMB, see what fallback we can use.
if (!Subtarget->hasDataBarrier()) {
// Some ARMv6 cpus can support data barriers with an mcr instruction.
// Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
// here.
if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
- Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
Builder.getInt32(0), Builder.getInt32(7),
Builder.getInt32(10), Builder.getInt32(5)};
- return Builder.CreateCall(MCR, args);
+ return Builder.CreateIntrinsic(Intrinsic::arm_mcr, {}, args);
} else {
// Instead of using barriers, atomic accesses on these subtargets use
// libcalls.
llvm_unreachable("makeDMB on a target so old that it has no barriers");
}
} else {
- Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
// Only a full system barrier exists in the M-class architectures.
Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
Constant *CDomain = Builder.getInt32(Domain);
- return Builder.CreateCall(DMB, CDomain);
+ return Builder.CreateIntrinsic(Intrinsic::arm_dmb, {}, CDomain);
}
}
@@ -21430,7 +21336,7 @@ bool ARMTargetLowering::shouldInsertFencesForAtomic(
return InsertFencesForAtomic;
}
-bool ARMTargetLowering::useLoadStackGuardNode() const {
+bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const {
// ROPI/RWPI are not supported currently.
return !Subtarget->isROPI() && !Subtarget->isRWPI();
}
@@ -21538,9 +21444,9 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
if (ValueTy->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
- Function *Ldrex = Intrinsic::getDeclaration(M, Int);
- Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+ Value *LoHi =
+ Builder.CreateIntrinsic(Int, {}, Addr, /*FMFSource=*/nullptr, "lohi");
Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
@@ -21554,8 +21460,7 @@ Value *ARMTargetLowering::emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy,
Type *Tys[] = { Addr->getType() };
Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
- Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
- CallInst *CI = Builder.CreateCall(Ldrex, Addr);
+ CallInst *CI = Builder.CreateIntrinsic(Int, Tys, Addr);
CI->addParamAttr(
0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
@@ -21566,8 +21471,7 @@ void ARMTargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
IRBuilderBase &Builder) const {
if (!Subtarget->hasV7Ops())
return;
- Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
+ Builder.CreateIntrinsic(Intrinsic::arm_clrex, {}, {});
}
Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
@@ -21582,19 +21486,18 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
if (Val->getType()->getPrimitiveSizeInBits() == 64) {
Intrinsic::ID Int =
IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
- Function *Strex = Intrinsic::getDeclaration(M, Int);
Type *Int32Ty = Type::getInt32Ty(M->getContext());
Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
if (!Subtarget->isLittle())
std::swap(Lo, Hi);
- return Builder.CreateCall(Strex, {Lo, Hi, Addr});
+ return Builder.CreateIntrinsic(Int, {}, {Lo, Hi, Addr});
}
Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
Type *Tys[] = { Addr->getType() };
- Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+ Function *Strex = Intrinsic::getOrInsertDeclaration(M, Int, Tys);
CallInst *CI = Builder.CreateCall(
Strex, {Builder.CreateZExtOrBitCast(
@@ -21722,14 +21625,13 @@ bool ARMTargetLowering::lowerInterleavedLoad(
static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
Intrinsic::arm_neon_vld3,
Intrinsic::arm_neon_vld4};
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
SmallVector<Value *, 2> Ops;
Ops.push_back(BaseAddr);
Ops.push_back(Builder.getInt32(LI->getAlign().value()));
- return Builder.CreateCall(VldnFunc, Ops, "vldN");
+ return Builder.CreateIntrinsic(LoadInts[Factor - 2], Tys, Ops,
+ /*FMFSource=*/nullptr, "vldN");
} else {
assert((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE");
@@ -21737,12 +21639,11 @@ bool ARMTargetLowering::lowerInterleavedLoad(
Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
Type *Tys[] = {VecTy, PtrTy};
- Function *VldnFunc =
- Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
SmallVector<Value *, 2> Ops;
Ops.push_back(BaseAddr);
- return Builder.CreateCall(VldnFunc, Ops, "vldN");
+ return Builder.CreateIntrinsic(LoadInts, Tys, Ops, /*FMFSource=*/nullptr,
+ "vldN");
}
};
@@ -21883,14 +21784,11 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
Type *Tys[] = {PtrTy, SubVecTy};
- Function *VstNFunc = Intrinsic::getDeclaration(
- SI->getModule(), StoreInts[Factor - 2], Tys);
-
SmallVector<Value *, 6> Ops;
Ops.push_back(BaseAddr);
append_range(Ops, Shuffles);
Ops.push_back(Builder.getInt32(SI->getAlign().value()));
- Builder.CreateCall(VstNFunc, Ops);
+ Builder.CreateIntrinsic(StoreInts[Factor - 2], Tys, Ops);
} else {
assert((Factor == 2 || Factor == 4) &&
"expected interleave factor of 2 or 4 for MVE");
@@ -21898,15 +21796,13 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
Type *Tys[] = {PtrTy, SubVecTy};
- Function *VstNFunc =
- Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
SmallVector<Value *, 6> Ops;
Ops.push_back(BaseAddr);
append_range(Ops, Shuffles);
for (unsigned F = 0; F < Factor; F++) {
Ops.push_back(Builder.getInt32(F));
- Builder.CreateCall(VstNFunc, Ops);
+ Builder.CreateIntrinsic(StoreInts, Tys, Ops);
Ops.pop_back();
}
}
@@ -22020,7 +21916,9 @@ Align ARMTargetLowering::getABIAlignmentForCallingConv(
// Avoid over-aligning vector parameters. It would require realigning the
// stack and waste space for no real benefit.
- return std::min(ABITypeAlign, DL.getStackAlignment());
+ MaybeAlign StackAlign = DL.getStackAlignment();
+ assert(StackAlign && "data layout string is missing stack alignment");
+ return std::min(ABITypeAlign, *StackAlign);
}
/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of