aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp224
1 files changed, 214 insertions, 10 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 204950f9010e..7cda915fffe9 100644
--- a/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -9,6 +9,7 @@
#include "HexagonISelLowering.h"
#include "HexagonRegisterInfo.h"
#include "HexagonSubtarget.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
#include "llvm/Support/CommandLine.h"
using namespace llvm;
@@ -39,7 +40,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v512i1, &Hexagon::HvxQRRegClass);
} else if (Subtarget.useHVX128BOps()) {
addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
@@ -50,7 +50,6 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
- addRegisterClass(MVT::v1024i1, &Hexagon::HvxQRRegClass);
}
// Set up operation actions.
@@ -66,8 +65,18 @@ HexagonTargetLowering::initializeHVXLowering() {
AddPromotedToType(Opc, FromTy, ToTy);
};
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
- setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
+ // Note: v16i1 -> i16 is handled in type legalization instead of op
+ // legalization.
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i128, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteV, Legal);
+ setOperationAction(ISD::VECTOR_SHUFFLE, ByteW, Legal);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
for (MVT T : LegalV) {
setIndexedLoadAction(ISD::POST_INC, T, Legal);
@@ -194,12 +203,13 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::XOR, BoolV, Legal);
}
- if (Use64b)
+ if (Use64b) {
for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
- else
+ } else {
for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
setOperationAction(ISD::SIGN_EXTEND_INREG, T, Legal);
+ }
setTargetDAGCombine(ISD::VSELECT);
}
@@ -283,6 +293,37 @@ HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
}
+bool
+HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
+ return Subtarget.isHVXVectorType(Ty, true) &&
+ Ty.getVectorElementType() == MVT::i1;
+}
+
+bool HexagonTargetLowering::allowsHvxMemoryAccess(
+ MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
+ // Bool vectors are excluded by default, but make it explicit to
+ // emphasize that bool vectors cannot be loaded or stored.
+ // Also, disallow double vector stores (to prevent unnecessary
+ // store widening in DAG combiner).
+ if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
+ return false;
+ if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
+ return false;
+ if (Fast)
+ *Fast = true;
+ return true;
+}
+
+bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
+ MVT VecTy, MachineMemOperand::Flags Flags, bool *Fast) const {
+ if (!Subtarget.isHVXVectorType(VecTy))
+ return false;
+ // XXX Should this be false? vmemu are a bit slower than vmem.
+ if (Fast)
+ *Fast = true;
+ return true;
+}
+
SDValue
HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
SelectionDAG &DAG) const {
@@ -402,10 +443,11 @@ HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
(Constant**)Consts.end());
Constant *CV = ConstantVector::get(Tmp);
- unsigned Align = HwLen;
- SDValue CP = LowerConstantPool(DAG.getConstantPool(CV, VecTy, Align), DAG);
+ Align Alignment(HwLen);
+ SDValue CP =
+ LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
- MachinePointerInfo::getConstantPool(MF), Align);
+ MachinePointerInfo::getConstantPool(MF), Alignment);
}
// A special case is a situation where the vector is built entirely from
@@ -1023,6 +1065,63 @@ HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
}
SDValue
+HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
+ MVT ResTy, SelectionDAG &DAG) const {
+ // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
+ // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
+ // vector register. The remaining bits of the vector register are
+ // unspecified.
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
+ MVT PredTy = ty(VecQ);
+ unsigned PredLen = PredTy.getVectorNumElements();
+ assert(HwLen % PredLen == 0);
+ MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
+
+ Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
+ SmallVector<Constant*, 128> Tmp;
+ // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
+ // These are bytes with the LSB rotated left with respect to their index.
+ for (unsigned i = 0; i != HwLen/8; ++i) {
+ for (unsigned j = 0; j != 8; ++j)
+ Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
+ }
+ Constant *CV = ConstantVector::get(Tmp);
+ Align Alignment(HwLen);
+ SDValue CP =
+ LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
+ SDValue Bytes =
+ DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
+ MachinePointerInfo::getConstantPool(MF), Alignment);
+
+ // Select the bytes that correspond to true bits in the vector predicate.
+ SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
+ getZero(dl, VecTy, DAG));
+ // Calculate the OR of all bytes in each group of 8. That will compress
+ // all the individual bits into a single byte.
+ // First, OR groups of 4, via vrmpy with 0x01010101.
+ SDValue All1 =
+ DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
+ SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
+ // Then rotate the accumulated vector by 4 bytes, and do the final OR.
+ SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
+ {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
+ SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
+
+ // Pick every 8th byte and coalesce them at the beginning of the output.
+ // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
+ // byte and so on.
+ SmallVector<int,128> Mask;
+ for (unsigned i = 0; i != HwLen; ++i)
+ Mask.push_back((8*i) % HwLen + i/(HwLen/8));
+ SDValue Collect =
+ DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
+ return DAG.getBitcast(ResTy, Collect);
+}
+
+SDValue
HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
const {
const SDLoc &dl(Op);
@@ -1431,6 +1530,53 @@ HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
+HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
+ SDValue ValQ = Op.getOperand(0);
+ MVT ResTy = ty(Op);
+ MVT VecTy = ty(ValQ);
+ const SDLoc &dl(Op);
+
+ if (isHvxBoolTy(VecTy) && ResTy.isScalarInteger()) {
+ unsigned HwLen = Subtarget.getVectorLength();
+ MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
+ SDValue VQ = compressHvxPred(ValQ, dl, WordTy, DAG);
+ unsigned BitWidth = ResTy.getSizeInBits();
+
+ if (BitWidth < 64) {
+ SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
+ dl, MVT::i32, DAG);
+ if (BitWidth == 32)
+ return W0;
+ assert(BitWidth < 32u);
+ return DAG.getZExtOrTrunc(W0, dl, ResTy);
+ }
+
+ // The result is >= 64 bits. The only options are 64 or 128.
+ assert(BitWidth == 64 || BitWidth == 128);
+ SmallVector<SDValue,4> Words;
+ for (unsigned i = 0; i != BitWidth/32; ++i) {
+ SDValue W = extractHvxElementReg(
+ VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
+ Words.push_back(W);
+ }
+ SmallVector<SDValue,2> Combines;
+ assert(Words.size() % 2 == 0);
+ for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
+ SDValue C = DAG.getNode(
+ HexagonISD::COMBINE, dl, MVT::i64, {Words[i+1], Words[i]});
+ Combines.push_back(C);
+ }
+
+ if (BitWidth == 64)
+ return Combines[0];
+
+ return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
+ }
+
+ return Op;
+}
+
+SDValue
HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
// Sign- and zero-extends are legal.
assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
@@ -1446,6 +1592,28 @@ HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
}
SDValue
+HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ MVT ResTy = ty(Op);
+
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ bool Use64b = Subtarget.useHVX64BOps();
+ unsigned IntPredCast = Use64b ? Intrinsic::hexagon_V6_pred_typecast
+ : Intrinsic::hexagon_V6_pred_typecast_128B;
+ if (IntNo == IntPredCast) {
+ SDValue Vs = Op.getOperand(1);
+ MVT OpTy = ty(Vs);
+ if (isHvxBoolTy(ResTy) && isHvxBoolTy(OpTy)) {
+ if (ResTy == OpTy)
+ return Vs;
+ return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Vs);
+ }
+ }
+
+ return Op;
+}
+
+SDValue
HexagonTargetLowering::SplitHvxPairOp(SDValue Op, SelectionDAG &DAG) const {
assert(!Op.isMachineOpcode());
SmallVector<SDValue,2> OpsL, OpsH;
@@ -1566,7 +1734,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
-
+ case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
@@ -1580,6 +1748,7 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
case ISD::SETCC:
case ISD::INTRINSIC_VOID: return Op;
+ case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
// Unaligned loads will be handled by the default lowering.
case ISD::LOAD: return SDValue();
}
@@ -1589,6 +1758,28 @@ HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Unhandled HVX operation");
}
+void
+HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+}
+
+void
+HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ unsigned Opc = N->getOpcode();
+ switch (Opc) {
+ case ISD::BITCAST:
+ if (isHvxBoolTy(ty(N->getOperand(0)))) {
+ SDValue Op(N, 0);
+ SDValue C = LowerHvxBitcast(Op, DAG);
+ Results.push_back(C);
+ }
+ break;
+ default:
+ break;
+ }
+}
+
SDValue
HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
const {
@@ -1621,3 +1812,16 @@ HexagonTargetLowering::isHvxOperation(SDValue Op) const {
return Subtarget.isHVXVectorType(ty(V), true);
});
}
+
+bool
+HexagonTargetLowering::isHvxOperation(SDNode *N) const {
+ // If the type of any result, or any operand type are HVX vector types,
+ // this is an HVX operation.
+ auto IsHvxTy = [this] (EVT Ty) {
+ return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
+ };
+ auto IsHvxOp = [this] (SDValue Op) {
+ return Subtarget.isHVXVectorType(ty(Op), true);
+ };
+ return llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp);
+}