aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/Hexagon/HexagonISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/Hexagon/HexagonISelLowering.cpp')
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp1937
1 files changed, 906 insertions, 1031 deletions
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 0e0da2ddc400..604d84994b6c 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
@@ -103,427 +104,52 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
cl::Hidden, cl::ZeroOrMore, cl::init(4),
cl::desc("Max #stores to inline memset"));
+static cl::opt<bool> AlignLoads("hexagon-align-loads",
+ cl::Hidden, cl::init(false),
+ cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
+
namespace {
class HexagonCCState : public CCState {
- unsigned NumNamedVarArgParams;
+ unsigned NumNamedVarArgParams = 0;
public:
- HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+ HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
- int NumNamedVarArgParams)
- : CCState(CC, isVarArg, MF, locs, C),
- NumNamedVarArgParams(NumNamedVarArgParams) {}
-
+ unsigned NumNamedArgs)
+ : CCState(CC, IsVarArg, MF, locs, C),
+ NumNamedVarArgParams(NumNamedArgs) {}
unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
};
- enum StridedLoadKind {
- Even = 0,
- Odd,
- NoPattern
- };
-
} // end anonymous namespace
-// Implement calling convention for Hexagon.
-
-static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
-static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
-static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
-static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
-
-static bool
-CC_Hexagon(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_Hexagon(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State);
-
-static bool
-CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- HexagonCCState &HState = static_cast<HexagonCCState &>(State);
-
- if (ValNo < HState.getNumNamedVarArgParams()) {
- // Deal with named arguments.
- return CC_Hexagon(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
- }
-
- // Deal with un-named arguments.
- unsigned Offset;
- if (ArgFlags.isByVal()) {
- // If pass-by-value, the size allocated on stack is decided
- // by ArgFlags.getByValSize(), not by the size of LocVT.
- Offset = State.AllocateStack(ArgFlags.getByValSize(),
- ArgFlags.getByValAlign());
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
- LocVT = MVT::i32;
- ValVT = MVT::i32;
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- }
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- Offset = State.AllocateStack(4, 4);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- Offset = State.AllocateStack(8, 8);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
- LocVT == MVT::v16i8) {
- Offset = State.AllocateStack(16, 16);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
- LocVT == MVT::v32i8) {
- Offset = State.AllocateStack(32, 32);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
- LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
- Offset = State.AllocateStack(64, 64);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
- LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
- Offset = State.AllocateStack(128, 128);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
- LocVT == MVT::v256i8) {
- Offset = State.AllocateStack(256, 256);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
-
- llvm_unreachable(nullptr);
-}
-
-static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (ArgFlags.isByVal()) {
- // Passed on stack.
- unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(),
- ArgFlags.getByValAlign());
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
-
- if (LocVT == MVT::i1) {
- LocVT = MVT::i32;
- } else if (LocVT == MVT::i8 || LocVT == MVT::i16) {
- LocVT = MVT::i32;
- ValVT = MVT::i32;
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
- LocVT = MVT::i32;
- LocInfo = CCValAssign::BCvt;
- } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
- LocVT = MVT::i64;
- LocInfo = CCValAssign::BCvt;
- }
-
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- if (!CC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- if (!CC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- if (LocVT == MVT::v8i32 || LocVT == MVT::v16i16 || LocVT == MVT::v32i8) {
- unsigned Offset = State.AllocateStack(ArgFlags.getByValSize(), 32);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
-
- auto &HST = State.getMachineFunction().getSubtarget<HexagonSubtarget>();
- if (HST.isHVXVectorType(LocVT)) {
- if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- return true; // CC didn't match.
-}
+// Implement calling convention for Hexagon.
-static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const MCPhysReg RegList[] = {
- Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
- Hexagon::R5
+static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ Hexagon::R0, Hexagon::R1, Hexagon::R2,
+ Hexagon::R3, Hexagon::R4, Hexagon::R5
};
- if (unsigned Reg = State.AllocateReg(RegList)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs);
- unsigned Offset = State.AllocateStack(4, 4);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
-}
-
-static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
-
- static const MCPhysReg RegList1[] = {
- Hexagon::D1, Hexagon::D2
- };
- static const MCPhysReg RegList2[] = {
- Hexagon::R1, Hexagon::R3
- };
- if (unsigned Reg = State.AllocateReg(RegList1, RegList2)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
+ // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1)
+ State.AllocateReg(ArgRegs[RegNum]);
- unsigned Offset = State.AllocateStack(8, 8, Hexagon::D2);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an even register number and does not actually
+ // allocate a register for the current argument.
return false;
}
-static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const MCPhysReg VecLstS[] = {
- Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4,
- Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9,
- Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
- Hexagon::V15
- };
- static const MCPhysReg VecLstD[] = {
- Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, Hexagon::W4,
- Hexagon::W5, Hexagon::W6, Hexagon::W7
- };
- auto &MF = State.getMachineFunction();
- auto &HST = MF.getSubtarget<HexagonSubtarget>();
-
- if (HST.useHVX64BOps() &&
- (LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
- LocVT == MVT::v64i8 || LocVT == MVT::v512i1)) {
- if (unsigned Reg = State.AllocateReg(VecLstS)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(64, 64);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (HST.useHVX64BOps() && (LocVT == MVT::v32i32 ||
- LocVT == MVT::v64i16 || LocVT == MVT::v128i8)) {
- if (unsigned Reg = State.AllocateReg(VecLstD)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(128, 128);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- // 128B Mode
- if (HST.useHVX128BOps() && (LocVT == MVT::v64i32 ||
- LocVT == MVT::v128i16 || LocVT == MVT::v256i8)) {
- if (unsigned Reg = State.AllocateReg(VecLstD)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(256, 256);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- if (HST.useHVX128BOps() &&
- (LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
- LocVT == MVT::v128i8 || LocVT == MVT::v1024i1)) {
- if (unsigned Reg = State.AllocateReg(VecLstS)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- unsigned Offset = State.AllocateStack(128, 128);
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return false;
- }
- return true;
-}
-
-static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- auto &MF = State.getMachineFunction();
- auto &HST = MF.getSubtarget<HexagonSubtarget>();
-
- if (LocVT == MVT::i1) {
- // Return values of type MVT::i1 still need to be assigned to R0, but
- // the value type needs to remain i1. LowerCallResult will deal with it,
- // but it needs to recognize i1 as the value type.
- LocVT = MVT::i32;
- } else if (LocVT == MVT::i8 || LocVT == MVT::i16) {
- LocVT = MVT::i32;
- ValVT = MVT::i32;
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- } else if (LocVT == MVT::v4i8 || LocVT == MVT::v2i16) {
- LocVT = MVT::i32;
- LocInfo = CCValAssign::BCvt;
- } else if (LocVT == MVT::v8i8 || LocVT == MVT::v4i16 || LocVT == MVT::v2i32) {
- LocVT = MVT::i64;
- LocInfo = CCValAssign::BCvt;
- } else if (LocVT == MVT::v64i8 || LocVT == MVT::v32i16 ||
- LocVT == MVT::v16i32 || LocVT == MVT::v512i1) {
- LocVT = MVT::v16i32;
- ValVT = MVT::v16i32;
- LocInfo = CCValAssign::Full;
- } else if (LocVT == MVT::v128i8 || LocVT == MVT::v64i16 ||
- LocVT == MVT::v32i32 ||
- (LocVT == MVT::v1024i1 && HST.useHVX128BOps())) {
- LocVT = MVT::v32i32;
- ValVT = MVT::v32i32;
- LocInfo = CCValAssign::Full;
- } else if (LocVT == MVT::v256i8 || LocVT == MVT::v128i16 ||
- LocVT == MVT::v64i32) {
- LocVT = MVT::v64i32;
- ValVT = MVT::v64i32;
- LocInfo = CCValAssign::Full;
- }
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
-
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
- if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
- if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
- return false;
- }
- return true; // CC didn't match.
-}
-
-static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (LocVT == MVT::i32 || LocVT == MVT::f32) {
- // Note that use of registers beyond R1 is not ABI compliant. However there
- // are (experimental) IR passes which generate internal functions that
- // return structs using these additional registers.
- static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1,
- Hexagon::R2, Hexagon::R3,
- Hexagon::R4, Hexagon::R5 };
- if (unsigned Reg = State.AllocateReg(RegList)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- }
-
- return true;
-}
-
-static bool RetCC_Hexagon64(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- if (LocVT == MVT::i64 || LocVT == MVT::f64) {
- if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- }
-
- return true;
-}
+#include "HexagonGenCallingConv.inc"
-static bool RetCC_HexagonVector(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- auto &MF = State.getMachineFunction();
- auto &HST = MF.getSubtarget<HexagonSubtarget>();
-
- if (LocVT == MVT::v16i32) {
- if (unsigned Reg = State.AllocateReg(Hexagon::V0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- } else if (LocVT == MVT::v32i32) {
- unsigned Req = HST.useHVX128BOps() ? Hexagon::V0 : Hexagon::W0;
- if (unsigned Reg = State.AllocateReg(Req)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- } else if (LocVT == MVT::v64i32) {
- if (unsigned Reg = State.AllocateReg(Hexagon::W0)) {
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- }
-
- return true;
-}
void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
if (VT != PromotedLdStVT) {
@@ -558,11 +184,14 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
bool
HexagonTargetLowering::CanLowerReturn(
- CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+ CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
+
+ if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
+ return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
}
@@ -571,7 +200,7 @@ HexagonTargetLowering::CanLowerReturn(
// the value is stored in memory pointed by a pointer passed by caller.
SDValue
HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
+ bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &dl, SelectionDAG &DAG) const {
@@ -579,11 +208,14 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SmallVector<CCValAssign, 16> RVLocs;
// CCState - Info about the registers and stack slot.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
// Analyze return values of ISD::RET
- CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
+ else
+ CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
SDValue Flag;
SmallVector<SDValue, 4> RetOps(1, Chain);
@@ -624,17 +256,20 @@ bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
/// being lowered. Returns a SDNode with the same number of values as the
/// ISD::CALL.
SDValue HexagonTargetLowering::LowerCallResult(
- SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
+ else
+ CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -683,67 +318,57 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
- bool &IsTailCall = CLI.IsTailCall;
CallingConv::ID CallConv = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
bool DoesNotReturn = CLI.DoesNotReturn;
- bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
auto PtrVT = getPointerTy(MF.getDataLayout());
- // Check for varargs.
- unsigned NumNamedVarArgParams = -1U;
- if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = GAN->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
- if (const Function* F = dyn_cast<Function>(GV)) {
- // If a function has zero args and is a vararg function, that's
- // disallowed so it must be an undeclared function. Do not assume
- // varargs if the callee is undefined.
- if (F->isVarArg() && F->getFunctionType()->getNumParams() != 0)
- NumNamedVarArgParams = F->getFunctionType()->getNumParams();
- }
- }
+ unsigned NumParams = CLI.CS.getInstruction()
+ ? CLI.CS.getFunctionType()->getNumParams()
+ : 0;
+ if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- HexagonCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext(), NumNamedVarArgParams);
+ HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ NumParams);
- if (IsVarArg)
- CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
else
CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
if (Attr.getValueAsString() == "true")
- IsTailCall = false;
+ CLI.IsTailCall = false;
- if (IsTailCall) {
+ if (CLI.IsTailCall) {
bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
- IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- IsVarArg, IsStructRet,
- StructAttrFlag,
- Outs, OutVals, Ins, DAG);
+ CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ IsVarArg, IsStructRet, StructAttrFlag, Outs,
+ OutVals, Ins, DAG);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
if (VA.isMemLoc()) {
- IsTailCall = false;
+ CLI.IsTailCall = false;
break;
}
}
- DEBUG(dbgs() << (IsTailCall ? "Eligible for Tail Call\n"
- : "Argument must be passed on stack. "
- "Not eligible for Tail Call\n"));
+ LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
+ : "Argument must be passed on stack. "
+ "Not eligible for Tail Call\n"));
}
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
- auto &HRI = *Subtarget.getRegisterInfo();
+ const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
@@ -789,7 +414,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
VA.getLocVT().getStoreSizeInBits() >> 3);
if (Flags.isByVal()) {
// The argument is a struct passed by value. According to LLVM, "Arg"
- // is is pointer.
+ // is a pointer.
MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
Flags, DAG, dl));
} else {
@@ -807,14 +432,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
}
- if (NeedsArgAlign && Subtarget.hasV60TOps()) {
- DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
- // V6 vectors passed by value have 64 or 128 byte alignment depending
- // on whether we are 64 byte vector mode or 128 byte.
- bool UseHVX128B = Subtarget.useHVX128BOps();
- assert(Subtarget.useHVXOps());
- const unsigned ObjAlign = UseHVX128B ? 128 : 64;
- LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
+ if (NeedsArgAlign && Subtarget.hasV60Ops()) {
+ LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
+ unsigned VecAlign = HRI.getSpillAlignment(Hexagon::HvxVRRegClass);
+ LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
MFI.ensureMaxAlignment(LargestAlignSeen);
}
// Transform all store nodes into one single node because all store
@@ -823,7 +444,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
SDValue Glue;
- if (!IsTailCall) {
+ if (!CLI.IsTailCall) {
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
Glue = Chain.getValue(1);
}
@@ -832,7 +453,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// chain and flag operands which copy the outgoing args into registers.
// The Glue is necessary since all emitted instructions must be
// stuck together.
- if (!IsTailCall) {
+ if (!CLI.IsTailCall) {
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
RegsToPass[i].second, Glue);
@@ -891,7 +512,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (Glue.getNode())
Ops.push_back(Glue);
- if (IsTailCall) {
+ if (CLI.IsTailCall) {
MFI.setHasTailCall();
return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
}
@@ -916,66 +537,36 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InVals, OutVals, Callee);
}
-static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
- SDValue &Base, SDValue &Offset,
- bool &IsInc, SelectionDAG &DAG) {
- if (Ptr->getOpcode() != ISD::ADD)
- return false;
-
- auto &HST = static_cast<const HexagonSubtarget&>(DAG.getSubtarget());
-
- bool ValidHVX128BType =
- HST.useHVX128BOps() && (VT == MVT::v32i32 ||
- VT == MVT::v64i16 || VT == MVT::v128i8);
- bool ValidHVXType =
- HST.useHVX64BOps() && (VT == MVT::v16i32 ||
- VT == MVT::v32i16 || VT == MVT::v64i8);
-
- if (ValidHVX128BType || ValidHVXType || VT == MVT::i64 || VT == MVT::i32 ||
- VT == MVT::i16 || VT == MVT::i8) {
- IsInc = (Ptr->getOpcode() == ISD::ADD);
- Base = Ptr->getOperand(0);
- Offset = Ptr->getOperand(1);
- // Ensure that Offset is a constant.
- return isa<ConstantSDNode>(Offset);
- }
-
- return false;
-}
-
-/// getPostIndexedAddressParts - returns true by value, base pointer and
-/// offset pointer and addressing mode by reference if this node can be
-/// combined with a load / store to form a post-indexed load / store.
+/// Returns true by value, base pointer and offset pointer and addressing
+/// mode by reference if this node can be combined with a load / store to
+/// form a post-indexed load / store.
bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
- SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const
-{
- EVT VT;
-
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- VT = LD->getMemoryVT();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- VT = ST->getMemoryVT();
- if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore())
- return false;
- } else {
+ SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
+ if (!LSN)
+ return false;
+ EVT VT = LSN->getMemoryVT();
+ if (!VT.isSimple())
+ return false;
+ bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
+ VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
+ VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
+ VT == MVT::v4i16 || VT == MVT::v8i8 ||
+ Subtarget.isHVXVectorType(VT.getSimpleVT());
+ if (!IsLegalType)
return false;
- }
- bool IsInc = false;
- bool isLegal = getIndexedAddressParts(Op, VT, Base, Offset, IsInc, DAG);
- if (isLegal) {
- auto &HII = *Subtarget.getInstrInfo();
- int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
- if (HII.isValidAutoIncImm(VT, OffsetVal)) {
- AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
- return true;
- }
- }
+ if (Op->getOpcode() != ISD::ADD)
+ return false;
+ Base = Op->getOperand(0);
+ Offset = Op->getOperand(1);
+ if (!isa<ConstantSDNode>(Offset.getNode()))
+ return false;
+ AM = ISD::POST_INC;
- return false;
+ int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
+ return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
}
SDValue
@@ -1080,7 +671,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
if (A == 0)
A = HFI.getStackAlignment();
- DEBUG({
+ LLVM_DEBUG({
dbgs () << __func__ << " Align: " << A << " Size: ";
Size.getNode()->dump(&DAG);
dbgs() << "\n";
@@ -1095,20 +686,22 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
SDValue HexagonTargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
- *DAG.getContext());
+ HexagonCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext(),
+ MF.getFunction().getFunctionType()->getNumParams());
- CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
+ if (Subtarget.useHVXOps())
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
+ else
+ CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
// For LLVM, in the case when returning a struct by value (>8byte),
// the first argument is a pointer that points to the location on caller's
@@ -1117,110 +710,62 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
// equal to) 8 bytes. If not, no address will be passed into callee and
// callee return the result direclty through R0/R1.
- SmallVector<SDValue, 8> MemOps;
+ auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
ISD::ArgFlagsTy Flags = Ins[i].Flags;
- unsigned ObjSize;
- unsigned StackLocation;
- int FI;
-
- if ( (VA.isRegLoc() && !Flags.isByVal())
- || (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() > 8)) {
- // Arguments passed in registers
- // 1. int, long long, ptr args that get allocated in register.
- // 2. Large struct that gets an register to put its address in.
- EVT RegVT = VA.getLocVT();
- if (RegVT == MVT::i8 || RegVT == MVT::i16 ||
- RegVT == MVT::i32 || RegVT == MVT::f32) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::IntRegsRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- if (VA.getLocInfo() == CCValAssign::BCvt)
- RegVT = VA.getValVT();
- SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
- // Treat values of type MVT::i1 specially: they are passed in
- // registers of type i32, but they need to remain as values of
- // type i1 for consistency of the argument lowering.
- if (VA.getValVT() == MVT::i1) {
- // Generate a copy into a predicate register and use the value
- // of the register as the "InVal".
- unsigned PReg =
- RegInfo.createVirtualRegister(&Hexagon::PredRegsRegClass);
- SDNode *T = DAG.getMachineNode(Hexagon::C2_tfrrp, dl, MVT::i1,
- Copy.getValue(0));
- Copy = DAG.getCopyToReg(Copy.getValue(1), dl, PReg, SDValue(T, 0));
- Copy = DAG.getCopyFromReg(Copy, dl, PReg, MVT::i1);
- }
- InVals.push_back(Copy);
- Chain = Copy.getValue(1);
- } else if (RegVT == MVT::i64 || RegVT == MVT::f64) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- if (VA.getLocInfo() == CCValAssign::BCvt)
- RegVT = VA.getValVT();
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
-
- // Single Vector
- } else if ((RegVT == MVT::v16i32 ||
- RegVT == MVT::v32i16 || RegVT == MVT::v64i8)) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
- } else if (Subtarget.useHVX128BOps() &&
- ((RegVT == MVT::v32i32 ||
- RegVT == MVT::v64i16 || RegVT == MVT::v128i8))) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxVRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
-
- // Double Vector
- } else if ((RegVT == MVT::v32i32 ||
- RegVT == MVT::v64i16 || RegVT == MVT::v128i8)) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
- } else if (Subtarget.useHVX128BOps() &&
- ((RegVT == MVT::v64i32 ||
- RegVT == MVT::v128i16 || RegVT == MVT::v256i8))) {
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxWRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
- } else if (RegVT == MVT::v512i1 || RegVT == MVT::v1024i1) {
- assert(0 && "need to support VecPred regs");
- unsigned VReg =
- RegInfo.createVirtualRegister(&Hexagon::HvxQRRegClass);
- RegInfo.addLiveIn(VA.getLocReg(), VReg);
- InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+ bool ByVal = Flags.isByVal();
+
+ // Arguments passed in registers:
+ // 1. 32- and 64-bit values and HVX vectors are passed directly,
+ // 2. Large structs are passed via an address, and the address is
+ // passed in a register.
+ if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
+ llvm_unreachable("ByValSize must be bigger than 8 bytes");
+
+ bool InReg = VA.isRegLoc() &&
+ (!ByVal || (ByVal && Flags.getByValSize() > 8));
+
+ if (InReg) {
+ MVT RegVT = VA.getLocVT();
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ RegVT = VA.getValVT();
+
+ const TargetRegisterClass *RC = getRegClassFor(RegVT);
+ unsigned VReg = MRI.createVirtualRegister(RC);
+ SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+
+ // Treat values of type MVT::i1 specially: they are passed in
+ // registers of type i32, but they need to remain as values of
+ // type i1 for consistency of the argument lowering.
+ if (VA.getValVT() == MVT::i1) {
+ assert(RegVT.getSizeInBits() <= 32);
+ SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
+ Copy, DAG.getConstant(1, dl, RegVT));
+ Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
+ ISD::SETNE);
} else {
- assert (0);
+#ifndef NDEBUG
+ unsigned RegSize = RegVT.getSizeInBits();
+ assert(RegSize == 32 || RegSize == 64 ||
+ Subtarget.isHVXVectorType(RegVT));
+#endif
}
- } else if (VA.isRegLoc() && Flags.isByVal() && Flags.getByValSize() <= 8) {
- assert (0 && "ByValSize must be bigger than 8 bytes");
+ InVals.push_back(Copy);
+ MRI.addLiveIn(VA.getLocReg(), VReg);
} else {
- // Sanity check.
- assert(VA.isMemLoc());
-
- if (Flags.isByVal()) {
- // If it's a byval parameter, then we need to compute the
- // "real" size, not the size of the pointer.
- ObjSize = Flags.getByValSize();
- } else {
- ObjSize = VA.getLocVT().getStoreSizeInBits() >> 3;
- }
+ assert(VA.isMemLoc() && "Argument should be passed in memory");
- StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
- // Create the frame index object for this incoming parameter...
- FI = MFI.CreateFixedObject(ObjSize, StackLocation, true);
+ // If it's a byval parameter, then we need to compute the
+ // "real" size, not the size of the pointer.
+ unsigned ObjSize = Flags.isByVal()
+ ? Flags.getByValSize()
+ : VA.getLocVT().getStoreSizeInBits() / 8;
- // Create the SelectionDAG nodes cordl, responding to a load
- // from this parameter.
+ // Create the frame index object for this incoming parameter.
+ int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
+ int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
if (Flags.isByVal()) {
@@ -1229,22 +774,19 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
// location.
InVals.push_back(FIN);
} else {
- InVals.push_back(
- DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
+ SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(MF, FI, 0));
+ InVals.push_back(L);
}
}
}
- if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
- if (isVarArg) {
+ if (IsVarArg) {
// This will point to the next argument passed via stack.
- int FrameIndex = MFI.CreateFixedObject(Hexagon_PointerSize,
- HEXAGON_LRFP_SIZE +
- CCInfo.getNextStackOffset(),
- true);
- FuncInfo.setVarArgsFrameIndex(FrameIndex);
+ int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
+ HMFI.setVarArgsFrameIndex(FI);
}
return Chain;
@@ -1262,66 +804,62 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
-static bool isSExtFree(SDValue N) {
- // A sign-extend of a truncate of a sign-extend is free.
- if (N.getOpcode() == ISD::TRUNCATE &&
- N.getOperand(0).getOpcode() == ISD::AssertSext)
- return true;
- // We have sign-extended loads.
- if (N.getOpcode() == ISD::LOAD)
- return true;
- return false;
-}
-
SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
-
+ const SDLoc &dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(LHS)))
- return LowerHvxSetCC(Op, DAG);
-
- SDValue Cmp = Op.getOperand(2);
- ISD::CondCode CC = cast<CondCodeSDNode>(Cmp)->get();
-
- EVT VT = Op.getValueType();
- EVT LHSVT = LHS.getValueType();
- EVT RHSVT = RHS.getValueType();
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ MVT ResTy = ty(Op);
+ MVT OpTy = ty(LHS);
- if (LHSVT == MVT::v2i16) {
- assert(ISD::isSignedIntSetCC(CC) || ISD::isUnsignedIntSetCC(CC));
- unsigned ExtOpc = ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND
- : ISD::ZERO_EXTEND;
- SDValue LX = DAG.getNode(ExtOpc, dl, MVT::v2i32, LHS);
- SDValue RX = DAG.getNode(ExtOpc, dl, MVT::v2i32, RHS);
- SDValue SC = DAG.getNode(ISD::SETCC, dl, MVT::v2i1, LX, RX, Cmp);
- return SC;
+ if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
+ MVT ElemTy = OpTy.getVectorElementType();
+ assert(ElemTy.isScalarInteger());
+ MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
+ OpTy.getVectorNumElements());
+ return DAG.getSetCC(dl, ResTy,
+ DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
+ DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
}
// Treat all other vector types as legal.
- if (VT.isVector())
+ if (ResTy.isVector())
return Op;
- // Equals and not equals should use sign-extend, not zero-extend, since
- // we can represent small negative values in the compare instructions.
+ // Comparisons of short integers should use sign-extend, not zero-extend,
+ // since we can represent small negative values in the compare instructions.
// The LLVM default is to use zero-extend arbitrarily in these cases.
- if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
- (RHSVT == MVT::i8 || RHSVT == MVT::i16) &&
- (LHSVT == MVT::i8 || LHSVT == MVT::i16)) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
- if (C && C->getAPIntValue().isNegative()) {
- LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
- RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
- return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
- LHS, RHS, Op.getOperand(2));
- }
- if (isSExtFree(LHS) || isSExtFree(RHS)) {
- LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, LHS);
- RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, RHS);
- return DAG.getNode(ISD::SETCC, dl, Op.getValueType(),
- LHS, RHS, Op.getOperand(2));
+ auto isSExtFree = [this](SDValue N) {
+ switch (N.getOpcode()) {
+ case ISD::TRUNCATE: {
+ // A sign-extend of a truncate of a sign-extend is free.
+ SDValue Op = N.getOperand(0);
+ if (Op.getOpcode() != ISD::AssertSext)
+ return false;
+ EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned ThisBW = ty(N).getSizeInBits();
+ unsigned OrigBW = OrigTy.getSizeInBits();
+ // The type that was sign-extended to get the AssertSext must be
+ // narrower than the type of N (so that N has still the same value
+ // as the original).
+ return ThisBW >= OrigBW;
+ }
+ case ISD::LOAD:
+ // We have sign-extended loads.
+ return true;
}
+ return false;
+ };
+
+ if (OpTy == MVT::i8 || OpTy == MVT::i16) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
+ bool IsNegative = C && C->getAPIntValue().isNegative();
+ if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
+ return DAG.getSetCC(dl, ResTy,
+ DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
+ DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
}
+
return SDValue();
}
@@ -1393,8 +931,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
else if (isVTi1Type)
T = DAG.getTargetConstantPool(CVal, ValTy, Align, Offset, TF);
else
- T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset,
- TF);
+ T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset, TF);
assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
"Inconsistent target flag encountered");
@@ -1480,7 +1017,7 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
if (RM == Reloc::Static) {
SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
const GlobalObject *GO = GV->getBaseObject();
- if (GO && HLOF.isGlobalInSmallSection(GO, HTM))
+ if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
}
@@ -1688,13 +1225,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST)
: TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
Subtarget(ST) {
- bool IsV4 = !Subtarget.hasV5TOps();
+ bool IsV4 = !Subtarget.hasV5Ops();
auto &HRI = *Subtarget.getRegisterInfo();
setPrefLoopAlignment(4);
setPrefFunctionAlignment(4);
setMinFunctionAlignment(2);
setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
+ setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
+ setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
setMaxAtomicSizeInBitsSupported(64);
setMinCmpXchgSizeInBits(32);
@@ -1728,45 +1267,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
- if (Subtarget.hasV5TOps()) {
+ if (Subtarget.hasV5Ops()) {
addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
}
- if (Subtarget.hasV60TOps()) {
- if (Subtarget.useHVX64BOps()) {
- addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
- // These "short" boolean vector types should be legal because
- // they will appear as results of vector compares. If they were
- // not legal, type legalization would try to make them legal
- // and that would require using operations that do not use or
- // produce such types. That, in turn, would imply using custom
- // nodes, which would be unoptimizable by the DAG combiner.
- // The idea is to rely on target-independent operations as much
- // as possible.
- addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
- } else if (Subtarget.useHVX128BOps()) {
- addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
- addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
- addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
- }
- }
-
//
// Handling of scalar operations.
//
@@ -1801,13 +1306,16 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
// Hexagon needs to optimize cases with negative constants.
- setOperationAction(ISD::SETCC, MVT::i8, Custom);
- setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
// VASTART needs to be custom lowered to use the VarArgsFrameIndex.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAEND, MVT::Other, Expand);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
@@ -1819,35 +1327,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setMinimumJumpTableEntries(std::numeric_limits<int>::max());
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- // Hexagon has instructions for add/sub with carry. The problem with
- // modeling these instructions is that they produce 2 results: Rdd and Px.
- // To model the update of Px, we will have to use Defs[p0..p3] which will
- // cause any predicate live range to spill. So, we pretend we dont't have
- // these instructions.
- setOperationAction(ISD::ADDE, MVT::i8, Expand);
- setOperationAction(ISD::ADDE, MVT::i16, Expand);
- setOperationAction(ISD::ADDE, MVT::i32, Expand);
- setOperationAction(ISD::ADDE, MVT::i64, Expand);
- setOperationAction(ISD::SUBE, MVT::i8, Expand);
- setOperationAction(ISD::SUBE, MVT::i16, Expand);
- setOperationAction(ISD::SUBE, MVT::i32, Expand);
- setOperationAction(ISD::SUBE, MVT::i64, Expand);
- setOperationAction(ISD::ADDC, MVT::i8, Expand);
- setOperationAction(ISD::ADDC, MVT::i16, Expand);
- setOperationAction(ISD::ADDC, MVT::i32, Expand);
- setOperationAction(ISD::ADDC, MVT::i64, Expand);
- setOperationAction(ISD::SUBC, MVT::i8, Expand);
- setOperationAction(ISD::SUBC, MVT::i16, Expand);
- setOperationAction(ISD::SUBC, MVT::i32, Expand);
- setOperationAction(ISD::SUBC, MVT::i64, Expand);
-
- // Only add and sub that detect overflow are the saturating ones.
+ setOperationAction(ISD::ABS, MVT::i32, Legal);
+ setOperationAction(ISD::ABS, MVT::i64, Legal);
+
+ // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
+ // but they only operate on i64.
for (MVT VT : MVT::integer_valuetypes()) {
- setOperationAction(ISD::UADDO, VT, Expand);
- setOperationAction(ISD::SADDO, VT, Expand);
- setOperationAction(ISD::USUBO, VT, Expand);
- setOperationAction(ISD::SSUBO, VT, Expand);
+ setOperationAction(ISD::UADDO, VT, Expand);
+ setOperationAction(ISD::USUBO, VT, Expand);
+ setOperationAction(ISD::SADDO, VT, Expand);
+ setOperationAction(ISD::SSUBO, VT, Expand);
+ setOperationAction(ISD::ADDCARRY, VT, Expand);
+ setOperationAction(ISD::SUBCARRY, VT, Expand);
}
+ setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
+ setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i8, Promote);
setOperationAction(ISD::CTLZ, MVT::i16, Promote);
@@ -1865,22 +1359,21 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
setOperationAction(ISD::BSWAP, MVT::i32, Legal);
setOperationAction(ISD::BSWAP, MVT::i64, Legal);
- setOperationAction(ISD::MUL, MVT::i64, Legal);
for (unsigned IntExpOp :
- { ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
- ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
- ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
- ISD::SMUL_LOHI, ISD::UMUL_LOHI }) {
- setOperationAction(IntExpOp, MVT::i32, Expand);
- setOperationAction(IntExpOp, MVT::i64, Expand);
+ {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
+ ISD::SDIVREM, ISD::UDIVREM, ISD::ROTL, ISD::ROTR,
+ ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
+ ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
+ for (MVT VT : MVT::integer_valuetypes())
+ setOperationAction(IntExpOp, VT, Expand);
}
for (unsigned FPExpOp :
{ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
ISD::FPOW, ISD::FCOPYSIGN}) {
- setOperationAction(FPExpOp, MVT::f32, Expand);
- setOperationAction(FPExpOp, MVT::f64, Expand);
+ for (MVT VT : MVT::fp_valuetypes())
+ setOperationAction(FPExpOp, VT, Expand);
}
// No extending loads from i32.
@@ -1920,10 +1413,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// either "custom" or "legal" for specific cases.
static const unsigned VectExpOps[] = {
// Integer arithmetic:
- ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
- ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::ADDC,
- ISD::SUBC, ISD::SADDO, ISD::UADDO, ISD::SSUBO, ISD::USUBO,
- ISD::SMUL_LOHI, ISD::UMUL_LOHI,
+ ISD::ADD, ISD::SUB, ISD::MUL, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM, ISD::SDIVREM, ISD::UDIVREM, ISD::SADDO,
+ ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
// Logical/bit:
ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
ISD::CTPOP, ISD::CTLZ, ISD::CTTZ,
@@ -1970,16 +1462,16 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Extending loads from (native) vectors of i8 into (native) vectors of i16
// are legal.
- setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
- setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
// Types natively supported:
- for (MVT NativeVT : {MVT::v32i1, MVT::v64i1, MVT::v4i8, MVT::v8i8, MVT::v2i16,
- MVT::v4i16, MVT::v1i32, MVT::v2i32, MVT::v1i64}) {
+ for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
+ MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
setOperationAction(ISD::BUILD_VECTOR, NativeVT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, NativeVT, Custom);
@@ -1995,19 +1487,34 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::XOR, NativeVT, Legal);
}
+ // Custom lower unaligned loads.
+ for (MVT VecVT : {MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
+ MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
+ setOperationAction(ISD::LOAD, VecVT, Custom);
+ }
+
+ for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v2i32, MVT::v4i16, MVT::v2i32}) {
+ setCondCodeAction(ISD::SETLT, VT, Expand);
+ setCondCodeAction(ISD::SETLE, VT, Expand);
+ setCondCodeAction(ISD::SETULT, VT, Expand);
+ setCondCodeAction(ISD::SETULE, VT, Expand);
+ }
+
+ // Custom-lower bitcasts from i8 to v8i1.
+ setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
setOperationAction(ISD::VSELECT, MVT::v2i16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
- setOperationAction(Opc, FromTy, Promote);
- AddPromotedToType(Opc, FromTy, ToTy);
- };
-
// Subtarget-specific operation actions.
//
- if (Subtarget.hasV5TOps()) {
+ if (Subtarget.hasV60Ops()) {
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
+ setOperationAction(ISD::ROTL, MVT::i64, Custom);
+ }
+ if (Subtarget.hasV5Ops()) {
setOperationAction(ISD::FMA, MVT::f64, Expand);
setOperationAction(ISD::FADD, MVT::f64, Expand);
setOperationAction(ISD::FSUB, MVT::f64, Expand);
@@ -2061,71 +1568,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Handling of indexed loads/stores: default is "expand".
//
- for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+ for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
+ MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
setIndexedLoadAction(ISD::POST_INC, VT, Legal);
setIndexedStoreAction(ISD::POST_INC, VT, Legal);
}
- if (Subtarget.useHVXOps()) {
- bool Use64b = Subtarget.useHVX64BOps();
- ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
- ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
- MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
- MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
-
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, ByteW, Legal);
- setOperationAction(ISD::AND, ByteV, Legal);
- setOperationAction(ISD::OR, ByteV, Legal);
- setOperationAction(ISD::XOR, ByteV, Legal);
-
- for (MVT T : LegalV) {
- setIndexedLoadAction(ISD::POST_INC, T, Legal);
- setIndexedStoreAction(ISD::POST_INC, T, Legal);
-
- setOperationAction(ISD::ADD, T, Legal);
- setOperationAction(ISD::SUB, T, Legal);
- if (T != ByteV) {
- setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Legal);
- setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Legal);
- }
-
- setOperationAction(ISD::MUL, T, Custom);
- setOperationAction(ISD::SETCC, T, Custom);
- setOperationAction(ISD::BUILD_VECTOR, T, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, T, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, T, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, T, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, T, Custom);
- if (T != ByteV)
- setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, T, Custom);
- }
-
- for (MVT T : LegalV) {
- if (T == ByteV)
- continue;
- // Promote all shuffles and concats to operate on vectors of bytes.
- setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
- setPromoteTo(ISD::CONCAT_VECTORS, T, ByteV);
- setPromoteTo(ISD::AND, T, ByteV);
- setPromoteTo(ISD::OR, T, ByteV);
- setPromoteTo(ISD::XOR, T, ByteV);
- }
-
- for (MVT T : LegalW) {
- // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
- // independent) handling of it would convert it to a load, which is
- // not always the optimal choice.
- setOperationAction(ISD::BUILD_VECTOR, T, Custom);
-
- if (T == ByteW)
- continue;
- // Promote all shuffles and concats to operate on vectors of bytes.
- setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
- setPromoteTo(ISD::CONCAT_VECTORS, T, ByteW);
- }
- }
+ if (Subtarget.useHVXOps())
+ initializeHVXLowering();
computeRegisterProperties(&HRI);
@@ -2195,7 +1645,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
}
- if (Subtarget.hasV5TOps()) {
+ if (Subtarget.hasV5Ops()) {
if (FastMath)
setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
else
@@ -2242,6 +1692,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((HexagonISD::NodeType)Opcode) {
+ case HexagonISD::ADDC: return "HexagonISD::ADDC";
+ case HexagonISD::SUBC: return "HexagonISD::SUBC";
case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
@@ -2255,16 +1707,12 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::CP: return "HexagonISD::CP";
case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
+ case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT";
case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
- case HexagonISD::EXTRACTURP: return "HexagonISD::EXTRACTURP";
case HexagonISD::INSERT: return "HexagonISD::INSERT";
- case HexagonISD::INSERTRP: return "HexagonISD::INSERTRP";
case HexagonISD::JT: return "HexagonISD::JT";
case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
- case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE";
- case HexagonISD::VPACKE: return "HexagonISD::VPACKE";
- case HexagonISD::VPACKO: return "HexagonISD::VPACKO";
case HexagonISD::VASL: return "HexagonISD::VASL";
case HexagonISD::VASR: return "HexagonISD::VASR";
case HexagonISD::VLSR: return "HexagonISD::VLSR";
@@ -2274,11 +1722,97 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::VROR: return "HexagonISD::VROR";
case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
case HexagonISD::VZERO: return "HexagonISD::VZERO";
+ case HexagonISD::VSPLATW: return "HexagonISD::VSPLATW";
+ case HexagonISD::D2P: return "HexagonISD::D2P";
+ case HexagonISD::P2D: return "HexagonISD::P2D";
+ case HexagonISD::V2Q: return "HexagonISD::V2Q";
+ case HexagonISD::Q2V: return "HexagonISD::Q2V";
+ case HexagonISD::QCAT: return "HexagonISD::QCAT";
+ case HexagonISD::QTRUE: return "HexagonISD::QTRUE";
+ case HexagonISD::QFALSE: return "HexagonISD::QFALSE";
+ case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
+ case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
+ case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
case HexagonISD::OP_END: break;
}
return nullptr;
}
+// Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
+// intrinsic.
+static bool isBrevLdIntrinsic(const Value *Inst) {
+ unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
+ return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
+ ID == Intrinsic::hexagon_L2_loadri_pbr ||
+ ID == Intrinsic::hexagon_L2_loadrh_pbr ||
+ ID == Intrinsic::hexagon_L2_loadruh_pbr ||
+ ID == Intrinsic::hexagon_L2_loadrb_pbr ||
+ ID == Intrinsic::hexagon_L2_loadrub_pbr);
+}
+
+// Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
+// instruction. So far we only handle bitcast, extract value and bit reverse
+// load intrinsic instructions. Should we handle CGEP ?
+static Value *getBrevLdObject(Value *V) {
+ if (Operator::getOpcode(V) == Instruction::ExtractValue ||
+ Operator::getOpcode(V) == Instruction::BitCast)
+ V = cast<Operator>(V)->getOperand(0);
+ else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
+ V = cast<Instruction>(V)->getOperand(0);
+ return V;
+}
+
+// Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
+// a back edge. If the back edge comes from the intrinsic itself, the incoming
+// edge is returned.
+static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
+ const BasicBlock *Parent = PN->getParent();
+ int Idx = -1;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
+ BasicBlock *Blk = PN->getIncomingBlock(i);
+ // Determine if the back edge is originated from intrinsic.
+ if (Blk == Parent) {
+ Value *BackEdgeVal = PN->getIncomingValue(i);
+ Value *BaseVal;
+ // Loop over till we return the same Value or we hit the IntrBaseVal.
+ do {
+ BaseVal = BackEdgeVal;
+ BackEdgeVal = getBrevLdObject(BackEdgeVal);
+ } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
+ // If the getBrevLdObject returns IntrBaseVal, we should return the
+ // incoming edge.
+ if (IntrBaseVal == BackEdgeVal)
+ continue;
+ Idx = i;
+ break;
+ } else // Set the node to incoming edge.
+ Idx = i;
+ }
+ assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
+ return PN->getIncomingValue(Idx);
+}
+
+// Bit-reverse Load Intrinsic: Figure out the underlying object the base
+// pointer points to, for the bit-reverse load intrinsic. Setting this to
+// memoperand might help alias analysis to figure out the dependencies.
+static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
+ Value *IntrBaseVal = V;
+ Value *BaseVal;
+ // Loop over till we return the same Value, implies we either figure out
+ // the object or we hit a PHI
+ do {
+ BaseVal = V;
+ V = getBrevLdObject(V);
+ } while (BaseVal != V);
+
+ // Identify the object from PHINode.
+ if (const PHINode *PN = dyn_cast<PHINode>(V))
+ return returnEdge(PN, IntrBaseVal);
+ // For non PHI nodes, the object is the last value returned by getBrevLdObject
+ else
+ return V;
+}
+
/// Given an intrinsic, checks if on the target the intrinsic will need to map
/// to a MemIntrinsicNode (touches memory). If this is the case, it returns
/// true and store the intrinsic information into the IntrinsicInfo that was
@@ -2288,6 +1822,32 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
+ case Intrinsic::hexagon_L2_loadrd_pbr:
+ case Intrinsic::hexagon_L2_loadri_pbr:
+ case Intrinsic::hexagon_L2_loadrh_pbr:
+ case Intrinsic::hexagon_L2_loadruh_pbr:
+ case Intrinsic::hexagon_L2_loadrb_pbr:
+ case Intrinsic::hexagon_L2_loadrub_pbr: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
+ auto &Cont = I.getCalledFunction()->getParent()->getContext();
+ // The intrinsic function call is of the form { ElTy, i8* }
+ // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
+ // should be derived from ElTy.
+ PointerType *PtrTy = I.getCalledFunction()
+ ->getReturnType()
+ ->getContainedType(0)
+ ->getPointerTo();
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ llvm::Value *BasePtrVal = I.getOperand(0);
+ Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
+ // The offset value comes through Modifier register. For now, assume the
+ // offset is 0.
+ Info.offset = 0;
+ Info.align = DL.getABITypeAlignment(Info.memVT.getTypeForEVT(Cont));
+ Info.flags = MachineMemOperand::MOLoad;
+ return true;
+ }
case Intrinsic::hexagon_V6_vgathermw:
case Intrinsic::hexagon_V6_vgathermw_128B:
case Intrinsic::hexagon_V6_vgathermh:
@@ -2319,17 +1879,13 @@ bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
- EVT MTy1 = EVT::getEVT(Ty1);
- EVT MTy2 = EVT::getEVT(Ty2);
- if (!MTy1.isSimple() || !MTy2.isSimple())
- return false;
- return (MTy1.getSimpleVT() == MVT::i64) && (MTy2.getSimpleVT() == MVT::i32);
+ return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
}
bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
if (!VT1.isSimple() || !VT2.isSimple())
return false;
- return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
+ return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
}
bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
@@ -2372,126 +1928,199 @@ HexagonTargetLowering::getPreferredVectorAction(EVT VT) const {
return TargetLoweringBase::TypeSplitVector;
}
+std::pair<SDValue, int>
+HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
+ if (Addr.getOpcode() == ISD::ADD) {
+ SDValue Op1 = Addr.getOperand(1);
+ if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
+ return { Addr.getOperand(0), CN->getSExtValue() };
+ }
+ return { Addr, 0 };
+}
+
// Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
// to select data from, V3 is the permutation.
SDValue
HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
const {
- const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
+ const auto *SVN = cast<ShuffleVectorSDNode>(Op);
+ ArrayRef<int> AM = SVN->getMask();
+ assert(AM.size() <= 8 && "Unexpected shuffle mask");
+ unsigned VecLen = AM.size();
- if (V2.isUndef())
- V2 = V1;
-
- if (SVN->isSplat()) {
- int Lane = SVN->getSplatIndex();
- if (Lane == -1) Lane = 0;
-
- // Test if V1 is a SCALAR_TO_VECTOR.
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
-
- // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
- // (and probably will turn into a SCALAR_TO_VECTOR once legalization
- // reaches it).
- if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
- !isa<ConstantSDNode>(V1.getOperand(0))) {
- bool IsScalarToVector = true;
- for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) {
- if (!V1.getOperand(i).isUndef()) {
- IsScalarToVector = false;
- break;
- }
- }
- if (IsScalarToVector)
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT, V1.getOperand(0));
+ MVT VecTy = ty(Op);
+ assert(!Subtarget.isHVXVectorType(VecTy, true) &&
+ "HVX shuffles should be legal");
+ assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
+
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ const SDLoc &dl(Op);
+
+ // If the inputs are not the same as the output, bail. This is not an
+ // error situation, but complicates the handling and the default expansion
+ // (into BUILD_VECTOR) should be adequate.
+ if (ty(Op0) != VecTy || ty(Op1) != VecTy)
+ return SDValue();
+
+ // Normalize the mask so that the first non-negative index comes from
+ // the first operand.
+ SmallVector<int,8> Mask(AM.begin(), AM.end());
+ unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
+ if (F == AM.size())
+ return DAG.getUNDEF(VecTy);
+ if (AM[F] >= int(VecLen)) {
+ ShuffleVectorSDNode::commuteMask(Mask);
+ std::swap(Op0, Op1);
+ }
+
+ // Express the shuffle mask in terms of bytes.
+ SmallVector<int,8> ByteMask;
+ unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ int M = Mask[i];
+ if (M < 0) {
+ for (unsigned j = 0; j != ElemBytes; ++j)
+ ByteMask.push_back(-1);
+ } else {
+ for (unsigned j = 0; j != ElemBytes; ++j)
+ ByteMask.push_back(M*ElemBytes + j);
}
- return DAG.getNode(HexagonISD::VSPLAT, dl, VT,
- DAG.getConstant(Lane, dl, MVT::i32));
}
+ assert(ByteMask.size() <= 8);
+
+ // All non-undef (non-negative) indexes are well within [0..127], so they
+ // fit in a single byte. Build two 64-bit words:
+ // - MaskIdx where each byte is the corresponding index (for non-negative
+ // indexes), and 0xFF for negative indexes, and
+ // - MaskUnd that has 0xFF for each negative index.
+ uint64_t MaskIdx = 0;
+ uint64_t MaskUnd = 0;
+ for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
+ unsigned S = 8*i;
+ uint64_t M = ByteMask[i] & 0xFF;
+ if (M == 0xFF)
+ MaskUnd |= M << S;
+ MaskIdx |= M << S;
+ }
+
+ if (ByteMask.size() == 4) {
+ // Identity.
+ if (MaskIdx == (0x03020100 | MaskUnd))
+ return Op0;
+ // Byte swap.
+ if (MaskIdx == (0x00010203 | MaskUnd)) {
+ SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
+ SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
+ return DAG.getBitcast(VecTy, T1);
+ }
- // FIXME: We need to support more general vector shuffles. See
- // below the comment from the ARM backend that deals in the general
- // case with the vector shuffles. For now, let expand handle these.
- return SDValue();
+ // Byte packs.
+ SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
+ typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
+ if (MaskIdx == (0x06040200 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
+ if (MaskIdx == (0x07050301 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
+
+ SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
+ typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
+ if (MaskIdx == (0x02000604 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
+ if (MaskIdx == (0x03010705 | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
+ }
+
+ if (ByteMask.size() == 8) {
+ // Identity.
+ if (MaskIdx == (0x0706050403020100ull | MaskUnd))
+ return Op0;
+ // Byte swap.
+ if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
+ SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
+ SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
+ return DAG.getBitcast(VecTy, T1);
+ }
- // If the shuffle is not directly supported and it has 4 elements, use
- // the PerfectShuffle-generated table to synthesize it from other shuffles.
-}
+ // Halfword picks.
+ if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
+ return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
+ VectorPair P = opSplit(Op0, dl, DAG);
+ return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
+ }
-// If BUILD_VECTOR has same base element repeated several times,
-// report true.
-static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
- unsigned NElts = BVN->getNumOperands();
- SDValue V0 = BVN->getOperand(0);
+ // Byte packs.
+ if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
+ if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
+ return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
+ }
- for (unsigned i = 1, e = NElts; i != e; ++i) {
- if (BVN->getOperand(i) != V0)
- return false;
+ return SDValue();
+}
+
+// Create a Hexagon-specific node for shifting a vector by an integer.
+SDValue
+HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
+ const {
+ if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode())) {
+ if (SDValue S = BVN->getSplatValue()) {
+ unsigned NewOpc;
+ switch (Op.getOpcode()) {
+ case ISD::SHL:
+ NewOpc = HexagonISD::VASL;
+ break;
+ case ISD::SRA:
+ NewOpc = HexagonISD::VASR;
+ break;
+ case ISD::SRL:
+ NewOpc = HexagonISD::VLSR;
+ break;
+ default:
+ llvm_unreachable("Unexpected shift opcode");
+ }
+ return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), S);
+ }
}
- return true;
+
+ return SDValue();
}
-// Lower a vector shift. Try to convert
-// <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
-// <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
SDValue
HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
- BuildVectorSDNode *BVN = nullptr;
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- SDValue V3;
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
+ return getVectorShiftByInt(Op, DAG);
+}
- if ((BVN = dyn_cast<BuildVectorSDNode>(V1.getNode())) &&
- isCommonSplatElement(BVN))
- V3 = V2;
- else if ((BVN = dyn_cast<BuildVectorSDNode>(V2.getNode())) &&
- isCommonSplatElement(BVN))
- V3 = V1;
- else
- return SDValue();
+SDValue
+HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
+ return Op;
+ return SDValue();
+}
- SDValue CommonSplat = BVN->getOperand(0);
- SDValue Result;
+SDValue
+HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
+ MVT ResTy = ty(Op);
+ SDValue InpV = Op.getOperand(0);
+ MVT InpTy = ty(InpV);
+ assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
+ const SDLoc &dl(Op);
- if (VT.getSimpleVT() == MVT::v4i16) {
- switch (Op.getOpcode()) {
- case ISD::SRA:
- Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat);
- break;
- case ISD::SHL:
- Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat);
- break;
- case ISD::SRL:
- Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat);
- break;
- default:
- return SDValue();
- }
- } else if (VT.getSimpleVT() == MVT::v2i32) {
- switch (Op.getOpcode()) {
- case ISD::SRA:
- Result = DAG.getNode(HexagonISD::VASR, dl, VT, V3, CommonSplat);
- break;
- case ISD::SHL:
- Result = DAG.getNode(HexagonISD::VASL, dl, VT, V3, CommonSplat);
- break;
- case ISD::SRL:
- Result = DAG.getNode(HexagonISD::VLSR, dl, VT, V3, CommonSplat);
- break;
- default:
- return SDValue();
- }
- } else {
- return SDValue();
+ // Handle conversion from i8 to v8i1.
+ if (ResTy == MVT::v8i1) {
+ SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
+ SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
+ return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
}
- return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ return SDValue();
}
bool
@@ -2509,9 +2138,10 @@ HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
Consts[i] = ConstantInt::get(IntTy, 0);
continue;
}
+ // Make sure to always cast to IntTy.
if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
const ConstantInt *CI = CN->getConstantIntValue();
- Consts[i] = const_cast<ConstantInt*>(CI);
+ Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
} else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
const ConstantFP *CF = CN->getConstantFPValue();
APInt A = CF->getValueAPF().bitcastToAPInt();
@@ -2550,8 +2180,8 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
Consts[1]->getZExtValue() << 16;
return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
}
- SDValue N = getNode(Hexagon::A2_combine_ll, dl, MVT::i32,
- {Elem[1], Elem[0]}, DAG);
+ SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
+ {Elem[1], Elem[0]}, DAG);
return DAG.getBitcast(MVT::v2i16, N);
}
@@ -2596,7 +2226,7 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
- SDValue R = getNode(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
+ SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
return DAG.getBitcast(MVT::v4i8, R);
}
@@ -2651,7 +2281,7 @@ HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
uint64_t Mask = (ElemTy == MVT::i8) ? 0xFFull
: (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
for (unsigned i = 0; i != Num; ++i)
- Val = (Val << W) | (Consts[i]->getZExtValue() & Mask);
+ Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
return DAG.getBitcast(VecTy, V0);
}
@@ -2677,8 +2307,56 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
unsigned VecWidth = VecTy.getSizeInBits();
unsigned ValWidth = ValTy.getSizeInBits();
unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
- assert(VecWidth == 32 || VecWidth == 64);
assert((VecWidth % ElemWidth) == 0);
+ auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
+
+ // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
+ // without any coprocessors).
+ if (ElemWidth == 1) {
+ assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
+ assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
+ // Check if this is an extract of the lowest bit.
+ if (IdxN) {
+ // Extracting the lowest bit is a no-op, but it changes the type,
+ // so it must be kept as an operation to avoid errors related to
+ // type mismatches.
+ if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
+ return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
+ }
+
+ // If the value extracted is a single bit, use tstbit.
+ if (ValWidth == 1) {
+ SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
+ SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
+ SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
+ return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
+ }
+
+ // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
+ // a predicate register. The elements of the vector are repeated
+ // in the register (if necessary) so that the total number is 8.
+ // The extracted subvector will need to be expanded in such a way.
+ unsigned Scale = VecWidth / ValWidth;
+
+ // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
+ // position 0.
+ assert(ty(IdxV) == MVT::i32);
+ SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+ DAG.getConstant(8*Scale, dl, MVT::i32));
+ SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
+ SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
+ while (Scale > 1) {
+ // The longest possible subvector is at most 32 bits, so it is always
+ // contained in the low subregister.
+ T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
+ T1 = expandPredicate(T1, dl, DAG);
+ Scale /= 2;
+ }
+
+ return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
+ }
+
+ assert(VecWidth == 32 || VecWidth == 64);
// Cast everything to scalar integer types.
MVT ScalarTy = tyScalar(VecTy);
@@ -2687,8 +2365,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
SDValue ExtV;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
- unsigned Off = C->getZExtValue() * ElemWidth;
+ if (IdxN) {
+ unsigned Off = IdxN->getZExtValue() * ElemWidth;
if (VecWidth == 64 && ValWidth == 32) {
assert(Off == 0 || Off == 32);
unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
@@ -2707,11 +2385,8 @@ HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
DAG.getConstant(ElemWidth, dl, MVT::i32));
- // EXTRACTURP takes width/offset in a 64-bit pair.
- SDValue CombV = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
- {WidthV, OffV});
- ExtV = DAG.getNode(HexagonISD::EXTRACTURP, dl, ScalarTy,
- {VecV, CombV});
+ ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
+ {VecV, WidthV, OffV});
}
// Cast ExtV to the requested result type.
@@ -2725,6 +2400,33 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
const SDLoc &dl, MVT ValTy,
SelectionDAG &DAG) const {
MVT VecTy = ty(VecV);
+ if (VecTy.getVectorElementType() == MVT::i1) {
+ MVT ValTy = ty(ValV);
+ assert(ValTy.getVectorElementType() == MVT::i1);
+ SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
+ unsigned VecLen = VecTy.getVectorNumElements();
+ unsigned Scale = VecLen / ValTy.getVectorNumElements();
+ assert(Scale > 1);
+
+ for (unsigned R = Scale; R > 1; R /= 2) {
+ ValR = contractPredicate(ValR, dl, DAG);
+ ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+ DAG.getUNDEF(MVT::i32), ValR);
+ }
+ // The longest possible subvector is at most 32 bits, so it is always
+ // contained in the low subregister.
+ ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
+
+ unsigned ValBytes = 64 / Scale;
+ SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
+ SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
+ DAG.getConstant(8, dl, MVT::i32));
+ SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
+ SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
+ {VecR, ValR, Width, Idx});
+ return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
+ }
+
unsigned VecWidth = VecTy.getSizeInBits();
unsigned ValWidth = ValTy.getSizeInBits();
assert(VecWidth == 32 || VecWidth == 64);
@@ -2752,17 +2454,32 @@ HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
if (ty(IdxV) != MVT::i32)
IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
- // INSERTRP takes width/offset in a 64-bit pair.
- SDValue CombV = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
- {WidthV, OffV});
- InsV = DAG.getNode(HexagonISD::INSERTRP, dl, ScalarTy,
- {VecV, ValV, CombV});
+ InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
+ {VecV, ValV, WidthV, OffV});
}
return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
}
SDValue
+HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
+ SelectionDAG &DAG) const {
+ assert(ty(Vec32).getSizeInBits() == 32);
+ if (isUndef(Vec32))
+ return DAG.getUNDEF(MVT::i64);
+ return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
+}
+
+SDValue
+HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
+ SelectionDAG &DAG) const {
+ assert(ty(Vec64).getSizeInBits() == 64);
+ if (isUndef(Vec64))
+ return DAG.getUNDEF(MVT::i32);
+ return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
+}
+
+SDValue
HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
const {
if (Ty.isVector()) {
@@ -2784,18 +2501,34 @@ SDValue
HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
unsigned BW = VecTy.getSizeInBits();
-
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy, true))
- return LowerHvxBuildVector(Op, DAG);
-
- if (BW == 32 || BW == 64) {
- const SDLoc &dl(Op);
- SmallVector<SDValue,8> Ops;
- for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
- if (BW == 32)
- return buildVector32(Ops, dl, VecTy, DAG);
+ const SDLoc &dl(Op);
+ SmallVector<SDValue,8> Ops;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ if (BW == 32)
+ return buildVector32(Ops, dl, VecTy, DAG);
+ if (BW == 64)
return buildVector64(Ops, dl, VecTy, DAG);
+
+ if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
+ // For each i1 element in the resulting predicate register, put 1
+ // shifted by the index of the element into a general-purpose register,
+ // then or them together and transfer it back into a predicate register.
+ SDValue Rs[8];
+ SDValue Z = getZero(dl, MVT::i32, DAG);
+ // Always produce 8 bits, repeat inputs if necessary.
+ unsigned Rep = 8 / VecTy.getVectorNumElements();
+ for (unsigned i = 0; i != 8; ++i) {
+ SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
+ Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
+ }
+ for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
+ for (unsigned i = 0, e = A.size()/2; i != e; ++i)
+ Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
+ }
+ // Move the value directly to a predicate register.
+ return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
}
return SDValue();
@@ -2805,14 +2538,64 @@ SDValue
HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
MVT VecTy = ty(Op);
- assert(!Subtarget.useHVXOps() || !Subtarget.isHVXVectorType(VecTy));
-
+ const SDLoc &dl(Op);
if (VecTy.getSizeInBits() == 64) {
assert(Op.getNumOperands() == 2);
- return DAG.getNode(HexagonISD::COMBINE, SDLoc(Op), VecTy, Op.getOperand(1),
+ return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
Op.getOperand(0));
}
+ MVT ElemTy = VecTy.getVectorElementType();
+ if (ElemTy == MVT::i1) {
+ assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
+ MVT OpTy = ty(Op.getOperand(0));
+ // Scale is how many times the operands need to be contracted to match
+ // the representation in the target register.
+ unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
+ assert(Scale == Op.getNumOperands() && Scale > 1);
+
+ // First, convert all bool vectors to integers, then generate pairwise
+ // inserts to form values of doubled length. Up until there are only
+ // two values left to concatenate, all of these values will fit in a
+ // 32-bit integer, so keep them as i32 to use 32-bit inserts.
+ SmallVector<SDValue,4> Words[2];
+ unsigned IdxW = 0;
+
+ for (SDValue P : Op.getNode()->op_values()) {
+ SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
+ for (unsigned R = Scale; R > 1; R /= 2) {
+ W = contractPredicate(W, dl, DAG);
+ W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+ DAG.getUNDEF(MVT::i32), W);
+ }
+ W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
+ Words[IdxW].push_back(W);
+ }
+
+ while (Scale > 2) {
+ SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
+ Words[IdxW ^ 1].clear();
+
+ for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
+ SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
+ // Insert W1 into W0 right next to the significant bits of W0.
+ SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
+ {W0, W1, WidthV, WidthV});
+ Words[IdxW ^ 1].push_back(T);
+ }
+ IdxW ^= 1;
+ Scale /= 2;
+ }
+
+ // Another sanity check. At this point there should only be two words
+ // left, and Scale should be 2.
+ assert(Scale == 2 && Words[IdxW].size() == 2);
+
+ SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
+ Words[IdxW][1], Words[IdxW][0]);
+ return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
+ }
+
return SDValue();
}
@@ -2820,10 +2603,6 @@ SDValue
HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
- MVT VecTy = ty(Vec);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerHvxExtractElement(Op, DAG);
-
MVT ElemTy = ty(Vec).getVectorElementType();
return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
}
@@ -2831,31 +2610,20 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue
HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
- SDValue Vec = Op.getOperand(0);
- MVT VecTy = ty(Vec);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerHvxExtractSubvector(Op, DAG);
-
- return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ty(Op), ty(Op), DAG);
+ return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
+ ty(Op), ty(Op), DAG);
}
SDValue
HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SelectionDAG &DAG) const {
- MVT VecTy = ty(Op);
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(VecTy))
- return LowerHvxInsertElement(Op, DAG);
-
return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
- SDLoc(Op), VecTy.getVectorElementType(), DAG);
+ SDLoc(Op), ty(Op).getVectorElementType(), DAG);
}
SDValue
HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
- if (Subtarget.useHVXOps() && Subtarget.isHVXVectorType(ty(Op)))
- return LowerHvxInsertSubvector(Op, DAG);
-
SDValue ValV = Op.getOperand(1);
return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
SDLoc(Op), ty(ValV), DAG);
@@ -2875,6 +2643,109 @@ HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
}
SDValue
+HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
+ const {
+ LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
+ unsigned HaveAlign = LN->getAlignment();
+ MVT LoadTy = ty(Op);
+ unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
+ if (HaveAlign >= NeedAlign)
+ return Op;
+
+ const SDLoc &dl(Op);
+ const DataLayout &DL = DAG.getDataLayout();
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned AS = LN->getAddressSpace();
+
+ // If the load aligning is disabled or the load can be broken up into two
+ // smaller legal loads, do the default (target-independent) expansion.
+ bool DoDefault = false;
+ // Handle it in the default way if this is an indexed load.
+ if (!LN->isUnindexed())
+ DoDefault = true;
+
+ if (!AlignLoads) {
+ if (allowsMemoryAccess(Ctx, DL, LN->getMemoryVT(), AS, HaveAlign))
+ return Op;
+ DoDefault = true;
+ }
+ if (!DoDefault && 2*HaveAlign == NeedAlign) {
+ // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
+ MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8*HaveAlign)
+ : MVT::getVectorVT(MVT::i8, HaveAlign);
+ DoDefault = allowsMemoryAccess(Ctx, DL, PartTy, AS, HaveAlign);
+ }
+ if (DoDefault) {
+ std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
+ return DAG.getMergeValues({P.first, P.second}, dl);
+ }
+
+ // The code below generates two loads, both aligned as NeedAlign, and
+ // with the distance of NeedAlign between them. For that to cover the
+ // bits that need to be loaded (and without overlapping), the size of
+ // the loads should be equal to NeedAlign. This is true for all loadable
+ // types, but add an assertion in case something changes in the future.
+ assert(LoadTy.getSizeInBits() == 8*NeedAlign);
+
+ unsigned LoadLen = NeedAlign;
+ SDValue Base = LN->getBasePtr();
+ SDValue Chain = LN->getChain();
+ auto BO = getBaseAndOffset(Base);
+ unsigned BaseOpc = BO.first.getOpcode();
+ if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
+ return Op;
+
+ if (BO.second % LoadLen != 0) {
+ BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
+ DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
+ BO.second -= BO.second % LoadLen;
+ }
+ SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
+ ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
+ DAG.getConstant(NeedAlign, dl, MVT::i32))
+ : BO.first;
+ SDValue Base0 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second, dl);
+ SDValue Base1 = DAG.getMemBasePlusOffset(BaseNoOff, BO.second+LoadLen, dl);
+
+ MachineMemOperand *WideMMO = nullptr;
+ if (MachineMemOperand *MMO = LN->getMemOperand()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ WideMMO = MF.getMachineMemOperand(MMO->getPointerInfo(), MMO->getFlags(),
+ 2*LoadLen, LoadLen, MMO->getAAInfo(), MMO->getRanges(),
+ MMO->getSyncScopeID(), MMO->getOrdering(),
+ MMO->getFailureOrdering());
+ }
+
+ SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
+ SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
+
+ SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
+ {Load1, Load0, BaseNoOff.getOperand(0)});
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Load0.getValue(1), Load1.getValue(1));
+ SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
+ return M;
+}
+
+SDValue
+HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ unsigned Opc = Op.getOpcode();
+ SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
+
+ if (Opc == ISD::ADDCARRY)
+ return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
+ { X, Y, C });
+
+ EVT CarryTy = C.getValueType();
+ SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
+ { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
+ SDValue Out[] = { SubC.getValue(0),
+ DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
+ return DAG.getMergeValues(Out, dl);
+}
+
+SDValue
HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Offset = Op.getOperand(1);
@@ -2904,6 +2775,17 @@ HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue
HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
+
+ // Handle INLINEASM first.
+ if (Opc == ISD::INLINEASM)
+ return LowerINLINEASM(Op, DAG);
+
+ if (isHvxOperation(Op)) {
+ // If HVX lowering returns nothing, try the default lowering.
+ if (SDValue V = LowerHvxOperation(Op, DAG))
+ return V;
+ }
+
switch (Opc) {
default:
#ifndef NDEBUG
@@ -2919,13 +2801,17 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::BITCAST: return LowerBITCAST(Op, DAG);
+ case ISD::LOAD: return LowerUnalignedLoad(Op, DAG);
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
+ case ISD::ROTL: return LowerROTL(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::JumpTable: return LowerJumpTable(Op, DAG);
case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
- // Frame & Return address. Currently unimplemented.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
@@ -2939,17 +2825,35 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
- case ISD::INLINEASM: return LowerINLINEASM(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
- case ISD::MUL:
- if (Subtarget.useHVXOps())
- return LowerHvxMul(Op, DAG);
break;
}
+
return SDValue();
}
+void
+HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const {
+ const SDLoc &dl(N);
+ switch (N->getOpcode()) {
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::SHL:
+ return;
+ case ISD::BITCAST:
+ // Handle a bitcast from v8i1 to i8.
+ if (N->getValueType(0) == MVT::i8) {
+ SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
+ N->getOperand(0), DAG);
+ Results.push_back(P);
+ }
+ break;
+ }
+}
+
/// Returns relocation base for the given PIC jumptable.
SDValue
HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
@@ -3023,7 +2927,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
case 512:
return {0u, &Hexagon::HvxVRRegClass};
case 1024:
- if (Subtarget.hasV60TOps() && Subtarget.useHVX128BOps())
+ if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
return {0u, &Hexagon::HvxVRRegClass};
return {0u, &Hexagon::HvxWRRegClass};
case 2048:
@@ -3042,7 +2946,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- return Subtarget.hasV5TOps();
+ return Subtarget.hasV5Ops();
}
/// isLegalAddressingMode - Return true if the addressing mode represented by
@@ -3104,9 +3008,9 @@ bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
SDValue Callee,
CallingConv::ID CalleeCC,
- bool isVarArg,
- bool isCalleeStructRet,
- bool isCallerStructRet,
+ bool IsVarArg,
+ bool IsCalleeStructRet,
+ bool IsCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
@@ -3137,12 +3041,12 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
}
// Do not tail call optimize vararg calls.
- if (isVarArg)
+ if (IsVarArg)
return false;
// Also avoid tail call optimization if either caller or callee uses struct
// return semantics.
- if (isCalleeStructRet || isCallerStructRet)
+ if (IsCalleeStructRet || IsCallerStructRet)
return false;
// In addition to the cases above, we also disable Tail Call Optimization if
@@ -3185,54 +3089,25 @@ bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
unsigned AS, unsigned Align, bool *Fast) const {
if (Fast)
*Fast = false;
-
- switch (VT.getSimpleVT().SimpleTy) {
- default:
- return false;
- case MVT::v64i8:
- case MVT::v128i8:
- case MVT::v256i8:
- case MVT::v32i16:
- case MVT::v64i16:
- case MVT::v128i16:
- case MVT::v16i32:
- case MVT::v32i32:
- case MVT::v64i32:
- return true;
- }
- return false;
+ return Subtarget.isHVXVectorType(VT.getSimpleVT());
}
std::pair<const TargetRegisterClass*, uint8_t>
HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
MVT VT) const {
- const TargetRegisterClass *RRC = nullptr;
+ if (Subtarget.isHVXVectorType(VT, true)) {
+ unsigned BitWidth = VT.getSizeInBits();
+ unsigned VecWidth = Subtarget.getVectorLength() * 8;
- uint8_t Cost = 1;
- switch (VT.SimpleTy) {
- default:
- return TargetLowering::findRepresentativeClass(TRI, VT);
- case MVT::v64i8:
- case MVT::v32i16:
- case MVT::v16i32:
- RRC = &Hexagon::HvxVRRegClass;
- break;
- case MVT::v128i8:
- case MVT::v64i16:
- case MVT::v32i32:
- if (Subtarget.hasV60TOps() && Subtarget.useHVXOps() &&
- Subtarget.useHVX128BOps())
- RRC = &Hexagon::HvxVRRegClass;
- else
- RRC = &Hexagon::HvxWRRegClass;
- break;
- case MVT::v256i8:
- case MVT::v128i16:
- case MVT::v64i32:
- RRC = &Hexagon::HvxWRRegClass;
- break;
+ if (VT.getVectorElementType() == MVT::i1)
+ return std::make_pair(&Hexagon::HvxQRRegClass, 1);
+ if (BitWidth == VecWidth)
+ return std::make_pair(&Hexagon::HvxVRRegClass, 1);
+ assert(BitWidth == 2 * VecWidth);
+ return std::make_pair(&Hexagon::HvxWRRegClass, 1);
}
- return std::make_pair(RRC, Cost);
+
+ return TargetLowering::findRepresentativeClass(TRI, VT);
}
Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,