diff options
Diffstat (limited to 'include/clang/Basic/arm_neon.td')
-rw-r--r-- | include/clang/Basic/arm_neon.td | 954 |
1 files changed, 598 insertions, 356 deletions
diff --git a/include/clang/Basic/arm_neon.td b/include/clang/Basic/arm_neon.td index b918459f4e4a..0247bb5dd0d7 100644 --- a/include/clang/Basic/arm_neon.td +++ b/include/clang/Basic/arm_neon.td @@ -11,142 +11,265 @@ // file will be generated. See ARM document DUI0348B. // //===----------------------------------------------------------------------===// +// +// Each intrinsic is a subclass of the Inst class. An intrinsic can either +// generate a __builtin_* call or it can expand to a set of generic operations. +// +// The operations are subclasses of Operation providing a list of DAGs, the +// last of which is the return value. The available DAG nodes are documented +// below. +// +//===----------------------------------------------------------------------===// + +// The base Operation class. All operations must subclass this. +class Operation<list<dag> ops=[]> { + list<dag> Ops = ops; + bit Unavailable = 0; +} +// An operation that only contains a single DAG. +class Op<dag op> : Operation<[op]>; +// A shorter version of Operation - takes a list of DAGs. The last of these will +// be the return value. +class LOp<list<dag> ops> : Operation<ops>; + +// These defs and classes are used internally to implement the SetTheory +// expansion and should be ignored. +foreach Index = 0-63 in + def sv##Index; +class MaskExpand; + +//===----------------------------------------------------------------------===// +// Available operations +//===----------------------------------------------------------------------===// + +// DAG arguments can either be operations (documented below) or variables. +// Variables are prefixed with '$'. There are variables for each input argument, +// with the name $pN, where N starts at zero. So the zero'th argument will be +// $p0, the first $p1 etc. + +// op - Binary or unary operator, depending on the number of arguments. The +// operator itself is just treated as a raw string and is not checked. +// example: (op "+", $p0, $p1) -> "__p0 + __p1". +// (op "-", $p0) -> "-__p0" +def op; +// call - Invoke another intrinsic. The input types are type checked and +// disambiguated. If there is no intrinsic defined that takes +// the given types (or if there is a type ambiguity) an error is +// generated at tblgen time. The name of the intrinsic is the raw +// name as given to the Inst class (not mangled). +// example: (call "vget_high", $p0) -> "vgetq_high_s16(__p0)" +// (assuming $p0 has type int16x8_t). +def call; +// cast - Perform a cast to a different type. This gets emitted as a static +// C-style cast. For a pure reinterpret cast (T x = *(T*)&y), use +// "bitcast". +// +// The syntax is (cast MOD* VAL). The last argument is the value to +// cast, preceded by a sequence of type modifiers. The target type +// starts off as the type of VAL, and is modified by MOD in sequence. +// The available modifiers are: +// - $X - Take the type of parameter/variable X. For example: +// (cast $p0, $p1) would cast $p1 to the type of $p0. +// - "R" - The type of the return type. +// - A typedef string - A NEON or stdint.h type that is then parsed. +// for example: (cast "uint32x4_t", $p0). +// - "U" - Make the type unsigned. +// - "S" - Make the type signed. +// - "H" - Halve the number of lanes in the type. +// - "D" - Double the number of lanes in the type. +// - "8" - Convert type to an equivalent vector of 8-bit signed +// integers. +// example: (cast "R", "U", $p0) -> "(uint32x4_t)__p0" (assuming the return +// value is of type "int32x4_t". +// (cast $p0, "D", "8", $p1) -> "(int8x16_t)__p1" (assuming __p0 +// has type float64x1_t or any other vector type of 64 bits). +// (cast "int32_t", $p2) -> "(int32_t)__p2" +def cast; +// bitcast - Same as "cast", except a reinterpret-cast is produced: +// (bitcast "T", $p0) -> "*(T*)&__p0". +// The VAL argument is saved to a temporary so it can be used +// as an l-value. +def bitcast; +// dup - Take a scalar argument and create a vector by duplicating it into +// all lanes. The type of the vector is the base type of the intrinsic. +// example: (dup $p1) -> "(uint32x2_t) {__p1, __p1}" (assuming the base type +// is uint32x2_t). +def dup; +// splat - Take a vector and a lane index, and return a vector of the same type +// containing repeated instances of the source vector at the lane index. +// example: (splat $p0, $p1) -> +// "__builtin_shufflevector(__p0, __p0, __p1, __p1, __p1, __p1)" +// (assuming __p0 has four elements). +def splat; +// save_temp - Create a temporary (local) variable. The variable takes a name +// based on the zero'th parameter and can be referenced using +// using that name in subsequent DAGs in the same +// operation. The scope of a temp is the operation. If a variable +// with the given name already exists, an error will be given at +// tblgen time. +// example: [(save_temp $var, (call "foo", $p0)), +// (op "+", $var, $p1)] -> +// "int32x2_t __var = foo(__p0); return __var + __p1;" +def save_temp; +// name_replace - Return the name of the current intrinsic with the first +// argument replaced by the second argument. Raises an error if +// the first argument does not exist in the intrinsic name. +// example: (call (name_replace "_high_", "_"), $p0) (to call the non-high +// version of this intrinsic). +def name_replace; +// literal - Create a literal piece of code. The code is treated as a raw +// string, and must be given a type. The type is a stdint.h or +// NEON intrinsic type as given to (cast). +// example: (literal "int32_t", "0") +def literal; +// shuffle - Create a vector shuffle. The syntax is (shuffle ARG0, ARG1, MASK). +// The MASK argument is a set of elements. The elements are generated +// from the two special defs "mask0" and "mask1". "mask0" expands to +// the lane indices in sequence for ARG0, and "mask1" expands to +// the lane indices in sequence for ARG1. They can be used as-is, e.g. +// +// (shuffle $p0, $p1, mask0) -> $p0 +// (shuffle $p0, $p1, mask1) -> $p1 +// +// or, more usefully, they can be manipulated using the SetTheory +// operators plus some extra operators defined in the NEON emitter. +// The operators are described below. +// example: (shuffle $p0, $p1, (add (highhalf mask0), (highhalf mask1))) -> +// A concatenation of the high halves of the input vectors. +def shuffle; + +// add, interleave, decimate: These set operators are vanilla SetTheory +// operators and take their normal definition. +def add; +def interleave; +def decimate; +// rotl - Rotate set left by a number of elements. +// example: (rotl mask0, 3) -> [3, 4, 5, 6, 0, 1, 2] +def rotl; +// rotl - Rotate set right by a number of elements. +// example: (rotr mask0, 3) -> [4, 5, 6, 0, 1, 2, 3] +def rotr; +// highhalf - Take only the high half of the input. +// example: (highhalf mask0) -> [4, 5, 6, 7] (assuming mask0 had 8 elements) +def highhalf; +// highhalf - Take only the low half of the input. +// example: (lowhalf mask0) -> [0, 1, 2, 3] (assuming mask0 had 8 elements) +def lowhalf; +// rev - Perform a variable-width reversal of the elements. The zero'th argument +// is a width in bits to reverse. The lanes this maps to is determined +// based on the element width of the underlying type. +// example: (rev 32, mask0) -> [3, 2, 1, 0, 7, 6, 5, 4] (if 8-bit elements) +// example: (rev 32, mask0) -> [1, 0, 3, 2] (if 16-bit elements) +def rev; +// mask0 - The initial sequence of lanes for shuffle ARG0 +def mask0 : MaskExpand; +// mask0 - The initial sequence of lanes for shuffle ARG1 +def mask1 : MaskExpand; + +def OP_NONE : Operation; +def OP_UNAVAILABLE : Operation { + let Unavailable = 1; +} + +//===----------------------------------------------------------------------===// +// Instruction definitions +//===----------------------------------------------------------------------===// -class Op; - -def OP_NONE : Op; -def OP_UNAVAILABLE : Op; -def OP_ADD : Op; -def OP_ADDL : Op; -def OP_ADDLHi : Op; -def OP_ADDW : Op; -def OP_ADDWHi : Op; -def OP_SUB : Op; -def OP_SUBL : Op; -def OP_SUBLHi : Op; -def OP_SUBW : Op; -def OP_SUBWHi : Op; -def OP_MUL : Op; -def OP_MLA : Op; -def OP_MLAL : Op; -def OP_MULLHi : Op; -def OP_MULLHi_N : Op; -def OP_MLALHi : Op; -def OP_MLALHi_N : Op; -def OP_MLS : Op; -def OP_MLSL : Op; -def OP_MLSLHi : Op; -def OP_MLSLHi_N : Op; -def OP_MUL_N : Op; -def OP_MLA_N : Op; -def OP_MLS_N : Op; -def OP_FMLA_N : Op; -def OP_FMLS_N : Op; -def OP_MLAL_N : Op; -def OP_MLSL_N : Op; -def OP_MUL_LN: Op; -def OP_MULX_LN: Op; -def OP_MULL_LN : Op; -def OP_MULLHi_LN : Op; -def OP_MLA_LN: Op; -def OP_MLS_LN: Op; -def OP_MLAL_LN : Op; -def OP_MLALHi_LN : Op; -def OP_MLSL_LN : Op; -def OP_MLSLHi_LN : Op; -def OP_QDMULL_LN : Op; -def OP_QDMULLHi_LN : Op; -def OP_QDMLAL_LN : Op; -def OP_QDMLALHi_LN : Op; -def OP_QDMLSL_LN : Op; -def OP_QDMLSLHi_LN : Op; -def OP_QDMULH_LN : Op; -def OP_QRDMULH_LN : Op; -def OP_FMS_LN : Op; -def OP_FMS_LNQ : Op; -def OP_TRN1 : Op; -def OP_ZIP1 : Op; -def OP_UZP1 : Op; -def OP_TRN2 : Op; -def OP_ZIP2 : Op; -def OP_UZP2 : Op; -def OP_EQ : Op; -def OP_GE : Op; -def OP_LE : Op; -def OP_GT : Op; -def OP_LT : Op; -def OP_NEG : Op; -def OP_NOT : Op; -def OP_AND : Op; -def OP_OR : Op; -def OP_XOR : Op; -def OP_ANDN : Op; -def OP_ORN : Op; -def OP_CAST : Op; -def OP_HI : Op; -def OP_LO : Op; -def OP_CONC : Op; -def OP_DUP : Op; -def OP_DUP_LN: Op; -def OP_SEL : Op; -def OP_REV64 : Op; -def OP_REV32 : Op; -def OP_REV16 : Op; -def OP_XTN : Op; -def OP_SQXTUN : Op; -def OP_QXTN : Op; -def OP_VCVT_NA_HI : Op; -def OP_VCVT_EX_HI : Op; -def OP_VCVTX_HI : Op; -def OP_REINT : Op; -def OP_ADDHNHi : Op; -def OP_RADDHNHi : Op; -def OP_SUBHNHi : Op; -def OP_RSUBHNHi : Op; -def OP_ABDL : Op; -def OP_ABDLHi : Op; -def OP_ABA : Op; -def OP_ABAL : Op; -def OP_ABALHi : Op; -def OP_QDMULLHi : Op; -def OP_QDMULLHi_N : Op; -def OP_QDMLALHi : Op; -def OP_QDMLALHi_N : Op; -def OP_QDMLSLHi : Op; -def OP_QDMLSLHi_N : Op; -def OP_DIV : Op; -def OP_LONG_HI : Op; -def OP_NARROW_HI : Op; -def OP_MOVL_HI : Op; -def OP_COPY_LN : Op; -def OP_COPYQ_LN : Op; -def OP_COPY_LNQ : Op; -def OP_SCALAR_MUL_LN : Op; -def OP_SCALAR_MUL_LNQ : Op; -def OP_SCALAR_MULX_LN : Op; -def OP_SCALAR_MULX_LNQ : Op; -def OP_SCALAR_VMULX_LN : Op; -def OP_SCALAR_VMULX_LNQ : Op; -def OP_SCALAR_QDMULL_LN : Op; -def OP_SCALAR_QDMULL_LNQ : Op; -def OP_SCALAR_QDMULH_LN : Op; -def OP_SCALAR_QDMULH_LNQ : Op; -def OP_SCALAR_QRDMULH_LN : Op; -def OP_SCALAR_QRDMULH_LNQ : Op; -def OP_SCALAR_GET_LN : Op; -def OP_SCALAR_SET_LN : Op; - -class Inst <string n, string p, string t, Op o> { +// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and +// a sequence of typespecs. +// +// The name is the base name of the intrinsic, for example "vget_lane". This is +// then mangled by the tblgen backend to add type information ("vget_lane_s16"). +// +// A typespec is a sequence of uppercase characters (modifiers) followed by one +// lowercase character. A typespec encodes a particular "base type" of the +// intrinsic. +// +// An example typespec is "Qs" - quad-size short - uint16x8_t. The available +// typespec codes are given below. +// +// The string given to an Inst class is a sequence of typespecs. The intrinsic +// is instantiated for every typespec in the sequence. For example "sdQsQd". +// +// The prototype is a string that defines the return type of the intrinsic +// and the type of each argument. The return type and every argument gets a +// "modifier" that can change in some way the "base type" of the intrinsic. +// +// The modifier 'd' means "default" and does not modify the base type in any +// way. The available modifiers are given below. +// +// Typespecs +// --------- +// c: char +// s: short +// i: int +// l: long +// k: 128-bit long +// f: float +// h: half-float +// d: double +// +// Typespec modifiers +// ------------------ +// S: scalar, only used for function mangling. +// U: unsigned +// Q: 128b +// H: 128b without mangling 'q' +// P: polynomial +// +// Prototype modifiers +// ------------------- +// prototype: return (arg, arg, ...) +// +// v: void +// t: best-fit integer (int/poly args) +// x: signed integer (int/float args) +// u: unsigned integer (int/float args) +// f: float (int args) +// F: double (int args) +// d: default +// g: default, ignore 'Q' size modifier. +// j: default, force 'Q' size modifier. +// w: double width elements, same num elts +// n: double width elements, half num elts +// h: half width elements, double num elts +// q: half width elements, quad num elts +// e: half width elements, double num elts, unsigned +// m: half width elements, same num elts +// i: constant int +// l: constant uint64 +// s: scalar of element type +// z: scalar of half width element type, signed +// r: scalar of double width element type, signed +// a: scalar of element type (splat to vector type) +// b: scalar of unsigned integer/long type (int/float args) +// $: scalar of signed integer/long type (int/float args) +// y: scalar of float +// o: scalar of double +// k: default elt width, double num elts +// 2,3,4: array of default vectors +// B,C,D: array of default elts, force 'Q' size modifier. +// p: pointer type +// c: const pointer type + +// Every intrinsic subclasses Inst. +class Inst <string n, string p, string t, Operation o> { string Name = n; string Prototype = p; string Types = t; - Op Operand = o; + string ArchGuard = ""; + + Operation Operation = o; + bit CartesianProductOfTypes = 0; + bit BigEndianSafe = 0; bit isShift = 0; bit isScalarShift = 0; bit isScalarNarrowShift = 0; bit isVCVT_N = 0; - bit isA64 = 0; - bit isCrypto = 0; + // For immediate checks: the immediate will be assumed to specify the lane of + // a Q register. Only used for intrinsics which end up calling polymorphic + // builtins. + bit isLaneQ = 0; // Certain intrinsics have different names than their representative // instructions. This field allows us to handle this correctly when we @@ -181,59 +304,193 @@ class WInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {} // WOpInst: Instruction with bit size only suffix (e.g., "8"). // LOpInst: Logical instruction with no bit size suffix. // NoTestOpInst: Intrinsic that has no corresponding instruction. -class SOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {} -class IOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {} -class WOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {} -class LOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {} -class NoTestOpInst<string n, string p, string t, Op o> : Inst<n, p, t, o> {} +class SOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} +class IOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} +class WOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} +class LOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} +class NoTestOpInst<string n, string p, string t, Operation o> : Inst<n, p, t, o> {} -// prototype: return (arg, arg, ...) -// v: void -// t: best-fit integer (int/poly args) -// x: signed integer (int/float args) -// u: unsigned integer (int/float args) -// f: float (int args) -// F: double (int args) -// d: default -// g: default, ignore 'Q' size modifier. -// j: default, force 'Q' size modifier. -// w: double width elements, same num elts -// n: double width elements, half num elts -// h: half width elements, double num elts -// q: half width elements, quad num elts -// e: half width elements, double num elts, unsigned -// m: half width elements, same num elts -// i: constant int -// l: constant uint64 -// s: scalar of element type -// z: scalar of half width element type, signed -// r: scalar of double width element type, signed -// a: scalar of element type (splat to vector type) -// b: scalar of unsigned integer/long type (int/float args) -// $: scalar of signed integer/long type (int/float args) -// y: scalar of float -// o: scalar of double -// k: default elt width, double num elts -// 2,3,4: array of default vectors -// B,C,D: array of default elts, force 'Q' size modifier. -// p: pointer type -// c: const pointer type +//===----------------------------------------------------------------------===// +// Operations +//===----------------------------------------------------------------------===// -// sizes: -// c: char -// s: short -// i: int -// l: long -// f: float -// h: half-float -// d: double +def OP_ADD : Op<(op "+", $p0, $p1)>; +def OP_ADDL : Op<(op "+", (call "vmovl", $p0), (call "vmovl", $p1))>; +def OP_ADDLHi : Op<(op "+", (call "vmovl_high", $p0), + (call "vmovl_high", $p1))>; +def OP_ADDW : Op<(op "+", $p0, (call "vmovl", $p1))>; +def OP_ADDWHi : Op<(op "+", $p0, (call "vmovl_high", $p1))>; +def OP_SUB : Op<(op "-", $p0, $p1)>; +def OP_SUBL : Op<(op "-", (call "vmovl", $p0), (call "vmovl", $p1))>; +def OP_SUBLHi : Op<(op "-", (call "vmovl_high", $p0), + (call "vmovl_high", $p1))>; +def OP_SUBW : Op<(op "-", $p0, (call "vmovl", $p1))>; +def OP_SUBWHi : Op<(op "-", $p0, (call "vmovl_high", $p1))>; +def OP_MUL : Op<(op "*", $p0, $p1)>; +def OP_MLA : Op<(op "+", $p0, (op "*", $p1, $p2))>; +def OP_MLAL : Op<(op "+", $p0, (call "vmull", $p1, $p2))>; +def OP_MULLHi : Op<(call "vmull", (call "vget_high", $p0), + (call "vget_high", $p1))>; +def OP_MULLHi_P64 : Op<(call "vmull", + (cast "poly64_t", (call "vget_high", $p0)), + (cast "poly64_t", (call "vget_high", $p1)))>; +def OP_MULLHi_N : Op<(call "vmull_n", (call "vget_high", $p0), $p1)>; +def OP_MLALHi : Op<(call "vmlal", $p0, (call "vget_high", $p1), + (call "vget_high", $p2))>; +def OP_MLALHi_N : Op<(call "vmlal_n", $p0, (call "vget_high", $p1), $p2)>; +def OP_MLS : Op<(op "-", $p0, (op "*", $p1, $p2))>; +def OP_MLSL : Op<(op "-", $p0, (call "vmull", $p1, $p2))>; +def OP_MLSLHi : Op<(call "vmlsl", $p0, (call "vget_high", $p1), + (call "vget_high", $p2))>; +def OP_MLSLHi_N : Op<(call "vmlsl_n", $p0, (call "vget_high", $p1), $p2)>; +def OP_MUL_N : Op<(op "*", $p0, (dup $p1))>; +def OP_MLA_N : Op<(op "+", $p0, (op "*", $p1, (dup $p2)))>; +def OP_MLS_N : Op<(op "-", $p0, (op "*", $p1, (dup $p2)))>; +def OP_FMLA_N : Op<(call "vfma", $p0, $p1, (dup $p2))>; +def OP_FMLS_N : Op<(call "vfms", $p0, $p1, (dup $p2))>; +def OP_MLAL_N : Op<(op "+", $p0, (call "vmull", $p1, (dup $p2)))>; +def OP_MLSL_N : Op<(op "-", $p0, (call "vmull", $p1, (dup $p2)))>; +def OP_MUL_LN : Op<(op "*", $p0, (splat $p1, $p2))>; +def OP_MULX_LN : Op<(call "vmulx", $p0, (splat $p1, $p2))>; +def OP_MULL_LN : Op<(call "vmull", $p0, (splat $p1, $p2))>; +def OP_MULLHi_LN: Op<(call "vmull", (call "vget_high", $p0), (splat $p1, $p2))>; +def OP_MLA_LN : Op<(op "+", $p0, (op "*", $p1, (splat $p2, $p3)))>; +def OP_MLS_LN : Op<(op "-", $p0, (op "*", $p1, (splat $p2, $p3)))>; +def OP_MLAL_LN : Op<(op "+", $p0, (call "vmull", $p1, (splat $p2, $p3)))>; +def OP_MLALHi_LN: Op<(op "+", $p0, (call "vmull", (call "vget_high", $p1), + (splat $p2, $p3)))>; +def OP_MLSL_LN : Op<(op "-", $p0, (call "vmull", $p1, (splat $p2, $p3)))>; +def OP_MLSLHi_LN : Op<(op "-", $p0, (call "vmull", (call "vget_high", $p1), + (splat $p2, $p3)))>; +def OP_QDMULL_LN : Op<(call "vqdmull", $p0, (splat $p1, $p2))>; +def OP_QDMULLHi_LN : Op<(call "vqdmull", (call "vget_high", $p0), + (splat $p1, $p2))>; +def OP_QDMLAL_LN : Op<(call "vqdmlal", $p0, $p1, (splat $p2, $p3))>; +def OP_QDMLALHi_LN : Op<(call "vqdmlal", $p0, (call "vget_high", $p1), + (splat $p2, $p3))>; +def OP_QDMLSL_LN : Op<(call "vqdmlsl", $p0, $p1, (splat $p2, $p3))>; +def OP_QDMLSLHi_LN : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1), + (splat $p2, $p3))>; +def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>; +def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>; +def OP_FMS_LN : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>; +def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>; +def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2), + (decimate mask1, 2)))>; +def OP_ZIP1 : Op<(shuffle $p0, $p1, (lowhalf (interleave mask0, mask1)))>; +def OP_UZP1 : Op<(shuffle $p0, $p1, (add (decimate mask0, 2), + (decimate mask1, 2)))>; +def OP_TRN2 : Op<(shuffle $p0, $p1, (interleave + (decimate (rotl mask0, 1), 2), + (decimate (rotl mask1, 1), 2)))>; +def OP_ZIP2 : Op<(shuffle $p0, $p1, (highhalf (interleave mask0, mask1)))>; +def OP_UZP2 : Op<(shuffle $p0, $p1, (add (decimate (rotl mask0, 1), 2), + (decimate (rotl mask1, 1), 2)))>; +def OP_EQ : Op<(cast "R", (op "==", $p0, $p1))>; +def OP_GE : Op<(cast "R", (op ">=", $p0, $p1))>; +def OP_LE : Op<(cast "R", (op "<=", $p0, $p1))>; +def OP_GT : Op<(cast "R", (op ">", $p0, $p1))>; +def OP_LT : Op<(cast "R", (op "<", $p0, $p1))>; +def OP_NEG : Op<(op "-", $p0)>; +def OP_NOT : Op<(op "~", $p0)>; +def OP_AND : Op<(op "&", $p0, $p1)>; +def OP_OR : Op<(op "|", $p0, $p1)>; +def OP_XOR : Op<(op "^", $p0, $p1)>; +def OP_ANDN : Op<(op "&", $p0, (op "~", $p1))>; +def OP_ORN : Op<(op "|", $p0, (op "~", $p1))>; +def OP_CAST : Op<(cast "R", $p0)>; +def OP_HI : Op<(shuffle $p0, $p0, (highhalf mask0))>; +def OP_LO : Op<(shuffle $p0, $p0, (lowhalf mask0))>; +def OP_CONC : Op<(shuffle $p0, $p1, (add mask0, mask1))>; +def OP_DUP : Op<(dup $p0)>; +def OP_DUP_LN : Op<(splat $p0, $p1)>; +def OP_SEL : Op<(cast "R", (op "|", + (op "&", $p0, (cast $p0, $p1)), + (op "&", (op "~", $p0), (cast $p0, $p2))))>; +def OP_REV16 : Op<(shuffle $p0, $p0, (rev 16, mask0))>; +def OP_REV32 : Op<(shuffle $p0, $p0, (rev 32, mask0))>; +def OP_REV64 : Op<(shuffle $p0, $p0, (rev 64, mask0))>; +def OP_XTN : Op<(call "vcombine", $p0, (call "vmovn", $p1))>; +def OP_SQXTUN : Op<(call "vcombine", (cast $p0, "U", $p0), + (call "vqmovun", $p1))>; +def OP_QXTN : Op<(call "vcombine", $p0, (call "vqmovn", $p1))>; +def OP_VCVT_NA_HI_F16 : Op<(call "vcombine", $p0, (call "vcvt_f16", $p1))>; +def OP_VCVT_NA_HI_F32 : Op<(call "vcombine", $p0, (call "vcvt_f32_f64", $p1))>; +def OP_VCVT_EX_HI_F32 : Op<(call "vcvt_f32_f16", (call "vget_high", $p0))>; +def OP_VCVT_EX_HI_F64 : Op<(call "vcvt_f64_f32", (call "vget_high", $p0))>; +def OP_VCVTX_HI : Op<(call "vcombine", $p0, (call "vcvtx_f32", $p1))>; +def OP_REINT : Op<(cast "R", $p0)>; +def OP_ADDHNHi : Op<(call "vcombine", $p0, (call "vaddhn", $p1, $p2))>; +def OP_RADDHNHi : Op<(call "vcombine", $p0, (call "vraddhn", $p1, $p2))>; +def OP_SUBHNHi : Op<(call "vcombine", $p0, (call "vsubhn", $p1, $p2))>; +def OP_RSUBHNHi : Op<(call "vcombine", $p0, (call "vrsubhn", $p1, $p2))>; +def OP_ABDL : Op<(cast "R", (call "vmovl", (cast $p0, "U", + (call "vabd", $p0, $p1))))>; +def OP_ABDLHi : Op<(call "vabdl", (call "vget_high", $p0), + (call "vget_high", $p1))>; +def OP_ABA : Op<(op "+", $p0, (call "vabd", $p1, $p2))>; +def OP_ABAL : Op<(op "+", $p0, (call "vabdl", $p1, $p2))>; +def OP_ABALHi : Op<(call "vabal", $p0, (call "vget_high", $p1), + (call "vget_high", $p2))>; +def OP_QDMULLHi : Op<(call "vqdmull", (call "vget_high", $p0), + (call "vget_high", $p1))>; +def OP_QDMULLHi_N : Op<(call "vqdmull_n", (call "vget_high", $p0), $p1)>; +def OP_QDMLALHi : Op<(call "vqdmlal", $p0, (call "vget_high", $p1), + (call "vget_high", $p2))>; +def OP_QDMLALHi_N : Op<(call "vqdmlal_n", $p0, (call "vget_high", $p1), $p2)>; +def OP_QDMLSLHi : Op<(call "vqdmlsl", $p0, (call "vget_high", $p1), + (call "vget_high", $p2))>; +def OP_QDMLSLHi_N : Op<(call "vqdmlsl_n", $p0, (call "vget_high", $p1), $p2)>; +def OP_DIV : Op<(op "/", $p0, $p1)>; +def OP_LONG_HI : Op<(cast "R", (call (name_replace "_high_", "_"), + (call "vget_high", $p0), $p1))>; +def OP_NARROW_HI : Op<(cast "R", (call "vcombine", + (cast "R", "H", $p0), + (cast "R", "H", + (call (name_replace "_high_", "_"), + $p1, $p2))))>; +def OP_MOVL_HI : LOp<[(save_temp $a1, (call "vget_high", $p0)), + (cast "R", + (call "vshll_n", $a1, (literal "int32_t", "0")))]>; +def OP_COPY_LN : Op<(call "vset_lane", (call "vget_lane", $p2, $p3), $p0, $p1)>; +def OP_SCALAR_MUL_LN : Op<(op "*", $p0, (call "vget_lane", $p1, $p2))>; +def OP_SCALAR_MULX_LN : Op<(call "vmulx", $p0, (call "vget_lane", $p1, $p2))>; +def OP_SCALAR_VMULX_LN : LOp<[(save_temp $x, (call "vget_lane", $p0, + (literal "int32_t", "0"))), + (save_temp $y, (call "vget_lane", $p1, $p2)), + (save_temp $z, (call "vmulx", $x, $y)), + (call "vset_lane", $z, $p0, $p2)]>; +def OP_SCALAR_VMULX_LNQ : LOp<[(save_temp $x, (call "vget_lane", $p0, + (literal "int32_t", "0"))), + (save_temp $y, (call "vget_lane", $p1, $p2)), + (save_temp $z, (call "vmulx", $x, $y)), + (call "vset_lane", $z, $p0, (literal "int32_t", + "0"))]>; +class ScalarMulOp<string opname> : + Op<(call opname, $p0, (call "vget_lane", $p1, $p2))>; + +def OP_SCALAR_QDMULL_LN : ScalarMulOp<"vqdmull">; +def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">; +def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">; + +def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t", + (call "vget_lane", + (bitcast "int16x4_t", $p0), $p1))>; +def OP_SCALAR_HALF_GET_LNQ : Op<(bitcast "float16_t", + (call "vget_lane", + (bitcast "int16x8_t", $p0), $p1))>; +def OP_SCALAR_HALF_SET_LN : Op<(bitcast "float16x4_t", + (call "vset_lane", + (bitcast "int16_t", $p0), + (bitcast "int16x4_t", $p1), $p2))>; +def OP_SCALAR_HALF_SET_LNQ : Op<(bitcast "float16x8_t", + (call "vset_lane", + (bitcast "int16_t", $p0), + (bitcast "int16x8_t", $p1), $p2))>; -// size modifiers: -// S: scalar, only used for function mangling. -// U: unsigned -// Q: 128b -// H: 128b without mangling 'q' -// P: polynomial +//===----------------------------------------------------------------------===// +// Instructions +//===----------------------------------------------------------------------===// //////////////////////////////////////////////////////////////////////////////// // E.3.1 Addition @@ -398,15 +655,19 @@ def VSET_LANE : IInst<"vset_lane", "dsdi", //////////////////////////////////////////////////////////////////////////////// // E.3.18 Initialize a vector from bit pattern -def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST>; +def VCREATE : NoTestOpInst<"vcreate", "dl", "csihfUcUsUiUlPcPsl", OP_CAST> { + let BigEndianSafe = 1; +} //////////////////////////////////////////////////////////////////////////////// // E.3.19 Set all lanes to same value let InstName = "vmov" in { def VDUP_N : WOpInst<"vdup_n", "ds", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>; + "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", + OP_DUP>; def VMOV_N : WOpInst<"vmov_n", "ds", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUl", OP_DUP>; + "UcUsUicsiPcPshfQUcQUsQUiQcQsQiQPcQPsQhQflUlQlQUl", + OP_DUP>; } let InstName = "" in def VDUP_LANE: WOpInst<"vdup_lane", "dgi", @@ -530,7 +791,11 @@ def VUZP : WInst<"vuzp", "2dd", "csiUcUsUifPcPsQcQsQiQUcQUsQUiQfQPcQPs">; // E.3.31 Vector reinterpret cast operations def VREINTERPRET : NoTestOpInst<"vreinterpret", "dd", - "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT>; + "csilUcUsUiUlhfPcPsQcQsQiQlQUcQUsQUiQUlQhQfQPcQPs", OP_REINT> { + let CartesianProductOfTypes = 1; + let ArchGuard = "!defined(__aarch64__)"; + let BigEndianSafe = 1; +} //////////////////////////////////////////////////////////////////////////////// // Vector fused multiply-add operations @@ -540,87 +805,66 @@ def VFMA : SInst<"vfma", "dddd", "fQf">; //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics -let isA64 = 1 in { +let ArchGuard = "defined(__aarch64__)" in { //////////////////////////////////////////////////////////////////////////////// // Load/Store -// With additional QUl, Ql, d, Qd, Pl, QPl type. -def LD1 : WInst<"vld1", "dc", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def LD2 : WInst<"vld2", "2c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def LD3 : WInst<"vld3", "3c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def LD4 : WInst<"vld4", "4c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST1 : WInst<"vst1", "vpd", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST2 : WInst<"vst2", "vp2", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST3 : WInst<"vst3", "vp3", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; -def ST4 : WInst<"vst4", "vp4", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsUcUsUiUlcsilhfdPcPsPlQPl">; +def LD1 : WInst<"vld1", "dc", "dQdPlQPl">; +def LD2 : WInst<"vld2", "2c", "QUlQldQdPlQPl">; +def LD3 : WInst<"vld3", "3c", "QUlQldQdPlQPl">; +def LD4 : WInst<"vld4", "4c", "QUlQldQdPlQPl">; +def ST1 : WInst<"vst1", "vpd", "dQdPlQPl">; +def ST2 : WInst<"vst2", "vp2", "QUlQldQdPlQPl">; +def ST3 : WInst<"vst3", "vp3", "QUlQldQdPlQPl">; +def ST4 : WInst<"vst4", "vp4", "QUlQldQdPlQPl">; def LD1_X2 : WInst<"vld1_x2", "2c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def LD3_x3 : WInst<"vld1_x3", "3c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def LD4_x4 : WInst<"vld1_x4", "4c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def ST1_X2 : WInst<"vst1_x2", "vp2", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def ST1_X3 : WInst<"vst1_x3", "vp3", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; def ST1_X4 : WInst<"vst1_x4", "vp4", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; - -// With additional QUl, Ql, d, Qd, Pl, QPl type. -def LD1_LANE : WInst<"vld1_lane", "dcdi", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def LD2_LANE : WInst<"vld2_lane", "2c2i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def LD3_LANE : WInst<"vld3_lane", "3c3i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def LD4_LANE : WInst<"vld4_lane", "4c4i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST1_LANE : WInst<"vst1_lane", "vpdi", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST2_LANE : WInst<"vst2_lane", "vp2i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST3_LANE : WInst<"vst3_lane", "vp3i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; -def ST4_LANE : WInst<"vst4_lane", "vp4i", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; - -def LD1_DUP : WInst<"vld1_dup", "dc", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPsQUlQldQdPlQPl">; + +def LD1_LANE : WInst<"vld1_lane", "dcdi", "dQdPlQPl">; +def LD2_LANE : WInst<"vld2_lane", "2c2i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD3_LANE : WInst<"vld3_lane", "3c3i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def LD4_LANE : WInst<"vld4_lane", "4c4i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST1_LANE : WInst<"vst1_lane", "vpdi", "dQdPlQPl">; +def ST2_LANE : WInst<"vst2_lane", "vp2i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST3_LANE : WInst<"vst3_lane", "vp3i", "lUlQcQUcQPcQlQUldQdPlQPl">; +def ST4_LANE : WInst<"vst4_lane", "vp4i", "lUlQcQUcQPcQlQUldQdPlQPl">; + +def LD1_DUP : WInst<"vld1_dup", "dc", "dQdPlQPl">; def LD2_DUP : WInst<"vld2_dup", "2c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">; def LD3_DUP : WInst<"vld3_dup", "3c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">; def LD4_DUP : WInst<"vld4_dup", "4c", - "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPlUcUsUiUlcsilhfdPcPsPl">; + "QUcQUsQUiQUlQcQsQiQlQhQfQdQPcQPsQPldPl">; + +def VLDRQ : WInst<"vldrq", "sc", "Pk">; +def VSTRQ : WInst<"vstrq", "vps", "Pk">; //////////////////////////////////////////////////////////////////////////////// // Addition -// With additional d, Qd type. -def ADD : IOpInst<"vadd", "ddd", "csilfdUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", - OP_ADD>; +def ADD : IOpInst<"vadd", "ddd", "dQd", OP_ADD>; //////////////////////////////////////////////////////////////////////////////// // Subtraction -// With additional Qd type. -def SUB : IOpInst<"vsub", "ddd", "csildfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUlQd", - OP_SUB>; +def SUB : IOpInst<"vsub", "ddd", "dQd", OP_SUB>; //////////////////////////////////////////////////////////////////////////////// // Multiplication -// With additional Qd type. -def MUL : IOpInst<"vmul", "ddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MUL>; -def MLA : IOpInst<"vmla", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLA>; -def MLS : IOpInst<"vmls", "dddd", "csifdUcUsUiQcQsQiQfQUcQUsQUiQd", OP_MLS>; +def MUL : IOpInst<"vmul", "ddd", "dQd", OP_MUL>; +def MLA : IOpInst<"vmla", "dddd", "dQd", OP_MLA>; +def MLS : IOpInst<"vmls", "dddd", "dQd", OP_MLS>; //////////////////////////////////////////////////////////////////////////////// // Multiplication Extended @@ -632,34 +876,33 @@ def FDIV : IOpInst<"vdiv", "ddd", "fdQfQd", OP_DIV>; //////////////////////////////////////////////////////////////////////////////// // Vector fused multiply-add operations -// With additional d, Qd type. -def FMLA : SInst<"vfma", "dddd", "fdQfQd">; +def FMLA : SInst<"vfma", "dddd", "dQd">; def FMLS : SInst<"vfms", "dddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// -// MUL, FMA, FMS definitions with scalar argument +// MUL, MLA, MLS, FMA, FMS definitions with scalar argument def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>; + +def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>; + +def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; +def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; //////////////////////////////////////////////////////////////////////////////// // Logical operations -// With additional Qd, Ql, QPl type. -def BSL : SInst<"vbsl", "dudd", - "csilUcUsUiUlfdPcPsQcQsQiQlQUcQUsQUiQUlQfQPcQPsQdPlQPl">; +def BSL : SInst<"vbsl", "dudd", "dPlQdQPl">; //////////////////////////////////////////////////////////////////////////////// // Absolute Difference -// With additional Qd type. -def ABD : SInst<"vabd", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; +def ABD : SInst<"vabd", "ddd", "dQd">; //////////////////////////////////////////////////////////////////////////////// // saturating absolute/negate -// With additional Qd/Ql type. -def ABS : SInst<"vabs", "dd", "csilfdQcQsQiQfQlQd">; -def QABS : SInst<"vqabs", "dd", "csilQcQsQiQl">; -def NEG : SOpInst<"vneg", "dd", "csilfdQcQsQiQfQdQl", OP_NEG>; -def QNEG : SInst<"vqneg", "dd", "csilQcQsQiQl">; +def ABS : SInst<"vabs", "dd", "dQdlQl">; +def QABS : SInst<"vqabs", "dd", "lQl">; +def NEG : SOpInst<"vneg", "dd", "dlQdQl", OP_NEG>; +def QNEG : SInst<"vqneg", "dd", "lQl">; //////////////////////////////////////////////////////////////////////////////// // Signed Saturating Accumulated of Unsigned Value @@ -671,9 +914,8 @@ def USQADD : SInst<"vsqadd", "ddd", "UcUsUiUlQUcQUsQUiQUl">; //////////////////////////////////////////////////////////////////////////////// // Reciprocal/Sqrt -// With additional d, Qd type. -def FRECPS : IInst<"vrecps", "ddd", "fdQfQd">; -def FRSQRTS : IInst<"vrsqrts", "ddd", "fdQfQd">; +def FRECPS : IInst<"vrecps", "ddd", "dQd">; +def FRSQRTS : IInst<"vrsqrts", "ddd", "dQd">; //////////////////////////////////////////////////////////////////////////////// // bitwise reverse @@ -693,13 +935,13 @@ def QXTN2 : SOpInst<"vqmovn_high", "qhk", "silUsUiUl", OP_QXTN>; //////////////////////////////////////////////////////////////////////////////// // Converting vectors -def VCVT_HIGH_F16 : SOpInst<"vcvt_high_f16", "qhj", "f", OP_VCVT_NA_HI>; -def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI>; -def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "fj", "d">; -def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI>; +def VCVT_HIGH_F16 : SOpInst<"vcvt_high_f16", "qhj", "f", OP_VCVT_NA_HI_F16>; +def VCVT_HIGH_F32_F16 : SOpInst<"vcvt_high_f32", "wk", "h", OP_VCVT_EX_HI_F32>; +def VCVT_F32_F64 : SInst<"vcvt_f32_f64", "md", "Qd">; +def VCVT_HIGH_F32_F64 : SOpInst<"vcvt_high_f32", "qfj", "d", OP_VCVT_NA_HI_F32>; def VCVT_F64_F32 : SInst<"vcvt_f64_f32", "wd", "f">; def VCVT_F64 : SInst<"vcvt_f64", "Fd", "lUlQlQUl">; -def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI>; +def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>; def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj", "d">; def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>; def FRINTN : SInst<"vrndn", "dd", "fdQfQd">; @@ -711,47 +953,22 @@ def FRINTZ : SInst<"vrnd", "dd", "fdQfQd">; def FRINTI : SInst<"vrndi", "dd", "fdQfQd">; def VCVT_S64 : SInst<"vcvt_s64", "xd", "dQd">; def VCVT_U64 : SInst<"vcvt_u64", "ud", "dQd">; -def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">; -def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">; -def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">; -def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">; -def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">; -def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">; -def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">; -def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">; -def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">; -def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">; -def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">; -def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">; -def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; -def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; -def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; -def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; -def FRECPE : SInst<"vrecpe", "dd", "fdUiQfQUiQd">; -def FRSQRTE : SInst<"vrsqrte", "dd", "fdUiQfQUiQd">; +def FRECPE : SInst<"vrecpe", "dd", "dQd">; +def FRSQRTE : SInst<"vrsqrte", "dd", "dQd">; def FSQRT : SInst<"vsqrt", "dd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Comparison -// With additional Qd, Ql, QPl type. -def FCAGE : IInst<"vcage", "udd", "fdQfQd">; -def FCAGT : IInst<"vcagt", "udd", "fdQfQd">; -def FCALE : IInst<"vcale", "udd", "fdQfQd">; -def FCALT : IInst<"vcalt", "udd", "fdQfQd">; -// With additional Ql, QUl, Qd types. -def CMTST : WInst<"vtst", "udd", - "csiUcUsUiPcPsQcQsQiQUcQUsQUiQPcQPslUlQlQUlPlQPl">; -// With additional l, Ul,d, Qd, Ql, QUl, Qd types. -def CFMEQ : SOpInst<"vceq", "udd", - "csilfUcUsUiUlPcQcdQdQsQiQfQUcQUsQUiQUlQlQPcPlQPl", OP_EQ>; -def CFMGE : SOpInst<"vcge", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GE>; -def CFMLE : SOpInst<"vcle", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LE>; -def CFMGT : SOpInst<"vcgt", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_GT>; -def CFMLT : SOpInst<"vclt", "udd", - "csilfUcUsUiUlQcQsQiQlQfQUcQUsQUiQUldQd", OP_LT>; +def FCAGE : IInst<"vcage", "udd", "dQd">; +def FCAGT : IInst<"vcagt", "udd", "dQd">; +def FCALE : IInst<"vcale", "udd", "dQd">; +def FCALT : IInst<"vcalt", "udd", "dQd">; +def CMTST : WInst<"vtst", "udd", "lUlPlQlQUlQPl">; +def CFMEQ : SOpInst<"vceq", "udd", "lUldQdQlQUlPlQPl", OP_EQ>; +def CFMGE : SOpInst<"vcge", "udd", "lUldQdQlQUl", OP_GE>; +def CFMLE : SOpInst<"vcle", "udd", "lUldQdQlQUl", OP_LE>; +def CFMGT : SOpInst<"vcgt", "udd", "lUldQdQlQUl", OP_GT>; +def CFMLT : SOpInst<"vclt", "udd", "lUldQdQlQUl", OP_LT>; def CMEQ : SInst<"vceqz", "ud", "csilfUcUsUiUlPcPsPlQcQsQiQlQfQUcQUsQUiQUlQPcQPsdQdQPl">; @@ -762,9 +979,8 @@ def CMLT : SInst<"vcltz", "ud", "csilfdQcQsQiQlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Max/Min Integer -// With additional Qd type. -def MAX : SInst<"vmax", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; -def MIN : SInst<"vmin", "ddd", "csiUcUsUifdQcQsQiQUcQUsQUiQfQd">; +def MAX : SInst<"vmax", "ddd", "dQd">; +def MIN : SInst<"vmin", "ddd", "dQd">; //////////////////////////////////////////////////////////////////////////////// // MaxNum/MinNum Floating Point @@ -773,9 +989,8 @@ def FMINNM : SInst<"vminnm", "ddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Max/Min -// With additional Qc Qs Qi QUc QUs QUi Qf Qd types. -def MAXP : SInst<"vpmax", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; -def MINP : SInst<"vpmin", "ddd", "csiUcUsUifQcQsQiQUcQUsQUiQfQd">; +def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">; +def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise MaxNum/MinNum Floating Point @@ -784,8 +999,7 @@ def FMINNMP : SInst<"vpminnm", "ddd", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Pairwise Addition -// With additional Qc Qs Qi QUc QUs QUi Qf Qd types. -def ADDP : IInst<"vpadd", "ddd", "csiUcUsUifQcQsQiQlQUcQUsQUiQUlQfQd">; +def ADDP : IInst<"vpadd", "ddd", "QcQsQiQlQUcQUsQUiQUlQfQd">; //////////////////////////////////////////////////////////////////////////////// // Shifts by constant @@ -795,11 +1009,8 @@ def SHLL_HIGH_N : SOpInst<"vshll_high_n", "ndi", "HcHsHiHUcHUsHUi", OP_LONG_HI>; //////////////////////////////////////////////////////////////////////////////// -// Shifts with insert, with additional Ql, QPl type. -def SRI_N : WInst<"vsri_n", "dddi", - "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">; -def SLI_N : WInst<"vsli_n", "dddi", - "csilUcUsUiUlPcPsQcQsQiQlQUcQUsQUiQUlQPcQPsPlQPl">; +def SRI_N : WInst<"vsri_n", "dddi", "PlQPl">; +def SLI_N : WInst<"vsli_n", "dddi", "PlQPl">; // Right shift narrow high def SHRN_HIGH_N : IOpInst<"vshrn_high_n", "hmdi", @@ -854,44 +1065,40 @@ def VQDMLAL_HIGH : SOpInst<"vqdmlal_high", "wwkk", "si", OP_QDMLALHi>; def VQDMLAL_HIGH_N : SOpInst<"vqdmlal_high_n", "wwks", "si", OP_QDMLALHi_N>; def VQDMLSL_HIGH : SOpInst<"vqdmlsl_high", "wwkk", "si", OP_QDMLSLHi>; def VQDMLSL_HIGH_N : SOpInst<"vqdmlsl_high_n", "wwks", "si", OP_QDMLSLHi_N>; +def VMULL_P64 : SInst<"vmull", "rss", "Pl">; +def VMULL_HIGH_P64 : SOpInst<"vmull_high", "rdd", "HPl", OP_MULLHi_P64>; + //////////////////////////////////////////////////////////////////////////////// // Extract or insert element from vector -def GET_LANE : IInst<"vget_lane", "sdi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">; -def SET_LANE : IInst<"vset_lane", "dsdi", - "csilPcPsUcUsUiUlQcQsQiQlQUcQUsQUiQUlPcPsQPcQPsfdQfQdPlQPl">; +def GET_LANE : IInst<"vget_lane", "sdi", "dQdPlQPl">; +def SET_LANE : IInst<"vset_lane", "dsdi", "dQdPlQPl">; def COPY_LANE : IOpInst<"vcopy_lane", "ddidi", - "csilPcPsUcUsUiUlPcPsPlfd", OP_COPY_LN>; + "csilUcUsUiUlPcPsPlfd", OP_COPY_LN>; def COPYQ_LANE : IOpInst<"vcopy_lane", "ddigi", - "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPYQ_LN>; + "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; def COPY_LANEQ : IOpInst<"vcopy_laneq", "ddiki", - "csilPcPsPlUcUsUiUlfd", OP_COPY_LNQ>; + "csilPcPsPlUcUsUiUlfd", OP_COPY_LN>; def COPYQ_LANEQ : IOpInst<"vcopy_laneq", "ddidi", "QcQsQiQlQUcQUsQUiQUlQPcQPsQfQdQPl", OP_COPY_LN>; //////////////////////////////////////////////////////////////////////////////// // Set all lanes to same value -def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", - "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", - OP_DUP_LN>; -def VDUP_LANE2: WOpInst<"vdup_laneq", "dki", - "csilPcPsUcUsUiUlhfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", +def VDUP_LANE1: WOpInst<"vdup_lane", "dgi", "hdQhQdPlQPl", OP_DUP_LN>; +def VDUP_LANE2: WOpInst<"vdup_laneq", "dji", + "csilUcUsUiUlPcPshfdQcQsQiQlQPcQPsQUcQUsQUiQUlQhQfQdPlQPl", OP_DUP_LN>; -def DUP_N : WOpInst<"vdup_n", "ds", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQdPlQPl", - OP_DUP>; -def MOV_N : WOpInst<"vmov_n", "ds", - "UcUsUicsiPcPsfQUcQUsQUiQcQsQiQPcQPsQflUlQlQUldQd", - OP_DUP>; +def DUP_N : WOpInst<"vdup_n", "ds", "dQdPlQPl", OP_DUP>; +def MOV_N : WOpInst<"vmov_n", "ds", "dQd", OP_DUP>; //////////////////////////////////////////////////////////////////////////////// -// Combining vectors, with additional Pl -def COMBINE : NoTestOpInst<"vcombine", "kdd", "csilhfdUcUsUiUlPcPsPl", OP_CONC>; +def COMBINE : NoTestOpInst<"vcombine", "kdd", "dPl", OP_CONC>; //////////////////////////////////////////////////////////////////////////////// -//Initialize a vector from bit pattern, with additional Pl -def CREATE : NoTestOpInst<"vcreate", "dl", "csihfdUcUsUiUlPcPslPl", OP_CAST>; +//Initialize a vector from bit pattern +def CREATE : NoTestOpInst<"vcreate", "dl", "dPl", OP_CAST> { + let BigEndianSafe = 1; +} //////////////////////////////////////////////////////////////////////////////// @@ -901,7 +1108,9 @@ def VMLS_LANEQ : IOpInst<"vmls_laneq", "dddji", "siUsUifQsQiQUsQUiQf", OP_MLS_LN>; def VFMA_LANE : IInst<"vfma_lane", "dddgi", "fdQfQd">; -def VFMA_LANEQ : IInst<"vfma_laneq", "dddji", "fdQfQd">; +def VFMA_LANEQ : IInst<"vfma_laneq", "dddji", "fdQfQd"> { + let isLaneQ = 1; +} def VFMS_LANE : IOpInst<"vfms_lane", "dddgi", "fdQfQd", OP_FMS_LN>; def VFMS_LANEQ : IOpInst<"vfms_laneq", "dddji", "fdQfQd", OP_FMS_LNQ>; @@ -933,7 +1142,7 @@ def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "Qd", OP_MUL_LN>; // Note: d type is handled by SCALAR_VMUL_LANEQ def VMUL_LANEQ : IOpInst<"vmul_laneq", "ddji", - "sifUsUiQsQiQfQUsQUiQfQd", OP_MUL_LN>; + "sifUsUiQsQiQUsQUiQfQd", OP_MUL_LN>; def VMULL_LANEQ : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>; def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "wkdi", "siUsUi", OP_MULLHi_LN>; @@ -965,12 +1174,11 @@ def FMINNMV : SInst<"vminnmv", "sd", "fQfQd">; //////////////////////////////////////////////////////////////////////////////// // Newly added Vector Extract for f64 -def VEXT_A64 : WInst<"vext", "dddi", - "cUcPcsUsPsiUilUlfdQcQUcQPcQsQUsQPsQiQUiQlQUlQfQdPlQPl">; +def VEXT_A64 : WInst<"vext", "dddi", "dQdPlQPl">; //////////////////////////////////////////////////////////////////////////////// // Crypto -let isCrypto = 1 in { +let ArchGuard = "__ARM_FEATURE_CRYPTO" in { def AESE : SInst<"vaese", "ddd", "QUc">; def AESD : SInst<"vaesd", "ddd", "QUc">; def AESMC : SInst<"vaesmc", "dd", "QUc">; @@ -990,6 +1198,31 @@ def SHA256SU1 : SInst<"vsha256su1", "dddd", "QUi">; } //////////////////////////////////////////////////////////////////////////////// +// Float -> Int conversions with explicit rounding mode + +let ArchGuard = "__ARM_ARCH >= 8" in { +def FCVTNS_S32 : SInst<"vcvtn_s32", "xd", "fQf">; +def FCVTNU_S32 : SInst<"vcvtn_u32", "ud", "fQf">; +def FCVTPS_S32 : SInst<"vcvtp_s32", "xd", "fQf">; +def FCVTPU_S32 : SInst<"vcvtp_u32", "ud", "fQf">; +def FCVTMS_S32 : SInst<"vcvtm_s32", "xd", "fQf">; +def FCVTMU_S32 : SInst<"vcvtm_u32", "ud", "fQf">; +def FCVTAS_S32 : SInst<"vcvta_s32", "xd", "fQf">; +def FCVTAU_S32 : SInst<"vcvta_u32", "ud", "fQf">; +} + +let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in { +def FCVTNS_S64 : SInst<"vcvtn_s64", "xd", "dQd">; +def FCVTNU_S64 : SInst<"vcvtn_u64", "ud", "dQd">; +def FCVTPS_S64 : SInst<"vcvtp_s64", "xd", "dQd">; +def FCVTPU_S64 : SInst<"vcvtp_u64", "ud", "dQd">; +def FCVTMS_S64 : SInst<"vcvtm_s64", "xd", "dQd">; +def FCVTMU_S64 : SInst<"vcvtm_u64", "ud", "dQd">; +def FCVTAS_S64 : SInst<"vcvta_s64", "xd", "dQd">; +def FCVTAU_S64 : SInst<"vcvta_u64", "ud", "dQd">; +} + +//////////////////////////////////////////////////////////////////////////////// // Permutation def VTRN1 : SOpInst<"vtrn1", "ddd", "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>; @@ -1021,11 +1254,17 @@ def VQTBX4_A64 : WInst<"vqtbx4", "ddDt", "UccPcQUcQcQPc">; //////////////////////////////////////////////////////////////////////////////// // Vector reinterpret cast operations -// With additional d, Qd, pl, Qpl types -def REINTERPRET - : NoTestOpInst<"vreinterpret", "dd", - "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPl", OP_REINT>; +// NeonEmitter implicitly takes the cartesian product of the type string with +// itself during generation so, unlike all other intrinsics, this one should +// include *all* types, not just additional ones. +def VVREINTERPRET + : NoTestOpInst<"vreinterpret", "dd", + "csilUcUsUiUlhfdPcPsPlQcQsQiQlQUcQUsQUiQUlQhQfQdQPcQPsQPlQPk", OP_REINT> { + let CartesianProductOfTypes = 1; + let BigEndianSafe = 1; + let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)"; +} //////////////////////////////////////////////////////////////////////////////// // Scalar Intrinsics @@ -1042,10 +1281,8 @@ def SCALAR_SUB : SInst<"vsub", "sss", "SlSUl">; def SCALAR_QSUB : SInst<"vqsub", "sss", "ScSsSiSlSUcSUsSUiSUl">; let InstName = "vmov" in { -def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "csilhfdUcUsUiUlPcPsPl", - OP_HI>; -def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "csilhfdUcUsUiUlPcPsPl", - OP_LO>; +def VGET_HIGH_A64 : NoTestOpInst<"vget_high", "dk", "dPl", OP_HI>; +def VGET_LOW_A64 : NoTestOpInst<"vget_low", "dk", "dPl", OP_LO>; } //////////////////////////////////////////////////////////////////////////////// @@ -1282,11 +1519,11 @@ def SCALAR_UQXTN : SInst<"vqmovn", "zs", "SUsSUiSUl">; // Scalar Floating Point multiply (scalar, by element) def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "ssdi", "SfSd", OP_SCALAR_MUL_LN>; -def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LNQ>; +def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LN>; // Scalar Floating Point multiply extended (scalar, by element) def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "ssdi", "SfSd", OP_SCALAR_MULX_LN>; -def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LNQ>; +def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LN>; def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">; @@ -1294,7 +1531,9 @@ def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">; def SCALAR_VMUL_LANE : IInst<"vmul_lane", "ddgi", "d">; // VMUL_LANEQ d type implemented using scalar mul lane -def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "ddji", "d">; +def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "ddji", "d"> { + let isLaneQ = 1; +} // VMULX_LANE d type implemented using scalar vmulx_lane def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "d", OP_SCALAR_VMULX_LN>; @@ -1312,7 +1551,7 @@ def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>; // Signed Saturating Doubling Multiply Long (scalar by element) def SCALAR_SQDMULL_LANE : SOpInst<"vqdmull_lane", "rsdi", "SsSi", OP_SCALAR_QDMULL_LN>; -def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LNQ>; +def SCALAR_SQDMULL_LANEQ : SOpInst<"vqdmull_laneq", "rsji", "SsSi", OP_SCALAR_QDMULL_LN>; // Signed Saturating Doubling Multiply-Add Long (scalar by element) def SCALAR_SQDMLAL_LANE : SInst<"vqdmlal_lane", "rrsdi", "SsSi">; @@ -1324,15 +1563,18 @@ def SCALAR_SQDMLS_LANEQ : SInst<"vqdmlsl_laneq", "rrsji", "SsSi">; // Scalar Integer Saturating Doubling Multiply Half High (scalar by element) def SCALAR_SQDMULH_LANE : SOpInst<"vqdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QDMULH_LN>; -def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LNQ>; +def SCALAR_SQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QDMULH_LN>; // Scalar Integer Saturating Rounding Doubling Multiply Half High def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>; -def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LNQ>; +def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>; def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; -def SCALAR_GET_LANE : IOpInst<"vget_lane", "sdi", "hQh", OP_SCALAR_GET_LN>; -def SCALAR_SET_LANE : IOpInst<"vset_lane", "dsdi", "hQh", OP_SCALAR_SET_LN>; +// FIXME: Rename so it is obvious this only applies to halfs. +def SCALAR_HALF_GET_LANE : IOpInst<"vget_lane", "sdi", "h", OP_SCALAR_HALF_GET_LN>; +def SCALAR_HALF_SET_LANE : IOpInst<"vset_lane", "dsdi", "h", OP_SCALAR_HALF_SET_LN>; +def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "sdi", "Qh", OP_SCALAR_HALF_GET_LNQ>; +def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", "dsdi", "Qh", OP_SCALAR_HALF_SET_LNQ>; } |