ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
|
Go to the source code of this file.
Macros | |
#define | __DEFAULT_FN_ATTRS512 |
#define | __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"))) |
Typedefs | |
typedef short __m512bh | __attribute__((__vector_size__(64), __aligned__(64))) |
typedef unsigned short | __bfloat16 |
Functions | |
static __inline__ float __DEFAULT_FN_ATTRS | _mm_cvtsbh_ss (__bfloat16 __A) |
Convert One BF16 Data to One Single Float Data. More... | |
static __inline__ __m512bh __DEFAULT_FN_ATTRS512 | _mm512_cvtne2ps_pbh (__m512 __A, __m512 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m512bh __DEFAULT_FN_ATTRS512 | _mm512_mask_cvtne2ps_pbh (__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m512bh __DEFAULT_FN_ATTRS512 | _mm512_maskz_cvtne2ps_pbh (__mmask32 __U, __m512 __A, __m512 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m256bh __DEFAULT_FN_ATTRS512 | _mm512_cvtneps_pbh (__m512 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m256bh __DEFAULT_FN_ATTRS512 | _mm512_mask_cvtneps_pbh (__m256bh __W, __mmask16 __U, __m512 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m256bh __DEFAULT_FN_ATTRS512 | _mm512_maskz_cvtneps_pbh (__mmask16 __U, __m512 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m512 __DEFAULT_FN_ATTRS512 | _mm512_dpbf16_ps (__m512 __D, __m512bh __A, __m512bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m512 __DEFAULT_FN_ATTRS512 | _mm512_mask_dpbf16_ps (__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m512 __DEFAULT_FN_ATTRS512 | _mm512_maskz_dpbf16_ps (__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m512 __DEFAULT_FN_ATTRS512 | _mm512_cvtpbh_ps (__m256bh __A) |
Convert Packed BF16 Data to Packed float Data. More... | |
static __inline__ __m512 __DEFAULT_FN_ATTRS512 | _mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A) |
Convert Packed BF16 Data to Packed float Data using zeroing mask. More... | |
static __inline__ __m512 __DEFAULT_FN_ATTRS512 | _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A) |
Convert Packed BF16 Data to Packed float Data using merging mask. More... | |
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"))) |
Definition at line 23 of file avx512bf16intrin.h.
#define __DEFAULT_FN_ATTRS512 |
Definition at line 20 of file avx512bf16intrin.h.
typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))) |
Definition at line 16 of file avx512bf16intrin.h.
typedef unsigned short __bfloat16 |
Definition at line 18 of file avx512bf16intrin.h.
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 512-bit vector of [16 x float]. |
__B | A 512-bit vector of [16 x float]. |
Definition at line 53 of file avx512bf16intrin.h.
Referenced by _mm512_mask_cvtne2ps_pbh(), and _mm512_maskz_cvtne2ps_pbh().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 512-bit vector of [16 x float]. |
Definition at line 114 of file avx512bf16intrin.h.
References _mm256_undefined_si256().
|
static |
Convert Packed BF16 Data to Packed float Data.
__A | A 256-bit vector of [16 x bfloat]. |
Definition at line 236 of file avx512bf16intrin.h.
References _mm512_castsi512_ps(), _mm512_cvtepi16_epi32(), and _mm512_slli_epi32().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 512-bit vector of [32 x bfloat]. |
__B | A 512-bit vector of [32 x bfloat]. |
__D | A 512-bit vector of [16 x float]. |
Definition at line 175 of file avx512bf16intrin.h.
References __D.
Referenced by _mm512_mask_dpbf16_ps(), and _mm512_maskz_dpbf16_ps().
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 512-bit vector of [16 x float]. |
__B | A 512-bit vector of [16 x float]. |
__W | A 512-bit vector of [32 x bfloat]. |
__U | A 32-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element from __W. |
Definition at line 76 of file avx512bf16intrin.h.
References _mm512_cvtne2ps_pbh().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 512-bit vector of [16 x float]. |
__W | A 256-bit vector of [16 x bfloat]. |
__U | A 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element from __W. |
Definition at line 135 of file avx512bf16intrin.h.
|
static |
Convert Packed BF16 Data to Packed float Data using merging mask.
__S | A 512-bit vector of [16 x float]. Elements are copied from __S when the corresponding mask bit is not set. |
__U | A 16-bit mask. |
__A | A 256-bit vector of [16 x bfloat]. |
Definition at line 270 of file avx512bf16intrin.h.
References _mm512_castsi512_ps(), _mm512_cvtepi16_epi32(), and _mm512_mask_slli_epi32().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 512-bit vector of [32 x bfloat]. |
__B | A 512-bit vector of [32 x bfloat]. |
__D | A 512-bit vector of [16 x float]. |
__U | A 16-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. |
Definition at line 199 of file avx512bf16intrin.h.
References __D, and _mm512_dpbf16_ps().
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 512-bit vector of [16 x float]. |
__B | A 512-bit vector of [16 x float]. |
__U | A 32-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element is zero. |
Definition at line 98 of file avx512bf16intrin.h.
References _mm512_cvtne2ps_pbh(), and _mm512_setzero_si512().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 512-bit vector of [16 x float]. |
__U | A 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element is zero. |
Definition at line 154 of file avx512bf16intrin.h.
References _mm256_setzero_si256().
|
static |
Convert Packed BF16 Data to Packed float Data using zeroing mask.
__U | A 16-bit mask. Elements are zeroed out when the corresponding mask bit is not set. |
__A | A 256-bit vector of [16 x bfloat]. |
Definition at line 252 of file avx512bf16intrin.h.
References _mm512_castsi512_ps(), _mm512_maskz_cvtepi16_epi32(), and _mm512_slli_epi32().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 512-bit vector of [32 x bfloat]. |
__B | A 512-bit vector of [32 x bfloat]. |
__D | A 512-bit vector of [16 x float]. |
__U | A 16-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. |
Definition at line 223 of file avx512bf16intrin.h.
References __D, _mm512_dpbf16_ps(), and _mm512_setzero_si512().
|
static |
Convert One BF16 Data to One Single Float Data.
This intrinsic does not correspond to a specific instruction.
__A | A bfloat data. |
Definition at line 36 of file avx512bf16intrin.h.