diff options
Diffstat (limited to 'contrib/llvm-project/clang/lib/Headers/amxintrin.h')
-rw-r--r-- | contrib/llvm-project/clang/lib/Headers/amxintrin.h | 67 |
1 files changed, 49 insertions, 18 deletions
diff --git a/contrib/llvm-project/clang/lib/Headers/amxintrin.h b/contrib/llvm-project/clang/lib/Headers/amxintrin.h index ec601a58e7c3..baa56f5b28e8 100644 --- a/contrib/llvm-project/clang/lib/Headers/amxintrin.h +++ b/contrib/llvm-project/clang/lib/Headers/amxintrin.h @@ -22,6 +22,8 @@ __attribute__((__always_inline__, __nodebug__, __target__("amx-int8"))) #define __DEFAULT_FN_ATTRS_BF16 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-bf16"))) +#define __DEFAULT_FN_ATTRS_FP16 \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-fp16"))) /// Load tile configuration from a 64-byte memory location specified by /// "mem_addr". The tile configuration includes the tile type palette, the @@ -290,6 +292,13 @@ _tile_dpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, return __builtin_ia32_tdpbf16ps_internal(m, n, k, dst, src1, src2); } +/// This is internal intrinsic. C/C++ user should avoid calling it directly. +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP16 +_tile_dpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdpfp16ps_internal(m, n, k, dst, src1, src2); +} + /// This struct pack the shape and tile data together for user. We suggest /// initializing the struct as early as possible, because compiler depends /// on the shape information to do configure. The constant value is preferred @@ -314,8 +323,8 @@ typedef struct __tile1024i_str { /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE -static void __tile_loadd(__tile1024i *dst, const void *base, - __SIZE_TYPE__ stride) { +static __inline__ void __tile_loadd(__tile1024i *dst, const void *base, + __SIZE_TYPE__ stride) { dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride); } @@ -335,8 +344,8 @@ static void __tile_loadd(__tile1024i *dst, const void *base, /// \param stride /// The stride between the rows' data to be loaded in memory. __DEFAULT_FN_ATTRS_TILE -static void __tile_stream_loadd(__tile1024i *dst, const void *base, - __SIZE_TYPE__ stride) { +static __inline__ void __tile_stream_loadd(__tile1024i *dst, const void *base, + __SIZE_TYPE__ stride) { dst->tile = _tile_loaddt1_internal(dst->row, dst->col, base, stride); } @@ -357,8 +366,8 @@ static void __tile_stream_loadd(__tile1024i *dst, const void *base, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbssd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -380,8 +389,8 @@ static void __tile_dpbssd(__tile1024i *dst, __tile1024i src0, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbsud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -403,8 +412,8 @@ static void __tile_dpbsud(__tile1024i *dst, __tile1024i src0, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbusd_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -426,8 +435,8 @@ static void __tile_dpbusd(__tile1024i *dst, __tile1024i src0, /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_INT8 -static void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbuud_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } @@ -439,14 +448,13 @@ static void __tile_dpbuud(__tile1024i *dst, __tile1024i src0, /// /// This intrinsic corresponds to the <c> TILESTORED </c> instruction. /// -/// \param dst -/// A destination tile. Max size is 1024 Bytes. /// \param base /// A pointer to base address. /// \param stride /// The stride between the rows' data to be stored in memory. __DEFAULT_FN_ATTRS_TILE -static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { +static __inline__ void __tile_stored(void *base, __SIZE_TYPE__ stride, + __tile1024i src) { _tile_stored_internal(src.row, src.col, base, stride, src.tile); } @@ -459,7 +467,7 @@ static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { /// \param dst /// The destination tile to be zero. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_TILE -static void __tile_zero(__tile1024i *dst) { +static __inline__ void __tile_zero(__tile1024i *dst) { dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col); } @@ -479,15 +487,38 @@ static void __tile_zero(__tile1024i *dst) { /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. __DEFAULT_FN_ATTRS_BF16 -static void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { +static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { dst->tile = _tile_dpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, src0.tile, src1.tile); } +/// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles src0 and +/// src1, accumulating the intermediate single-precision (32-bit) floating-point +/// elements with elements in "dst", and store the 32-bit result back to tile +/// "dst". +/// +/// \headerfile <immintrin.h> +/// +/// This intrinsic corresponds to the <c> TDPFP16PS </c> instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS_FP16 +static __inline__ void __tile_dpfp16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_dpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, + src0.tile, src1.tile); +} + #undef __DEFAULT_FN_ATTRS_TILE #undef __DEFAULT_FN_ATTRS_INT8 #undef __DEFAULT_FN_ATTRS_BF16 +#undef __DEFAULT_FN_ATTRS_FP16 #endif /* __x86_64__ */ #endif /* __AMXINTRIN_H */ |