ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
|
Go to the source code of this file.
Macros | |
#define | __DEFAULT_FN_ATTRS128 |
#define | __DEFAULT_FN_ATTRS256 |
Typedefs | |
typedef short __m128bh | __attribute__((__vector_size__(16), __aligned__(16))) |
Functions | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 | _mm_cvtne2ps_pbh (__m128 __A, __m128 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 | _mm_mask_cvtne2ps_pbh (__m128bh __W, __mmask8 __U, __m128 __A, __m128 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 | _mm_maskz_cvtne2ps_pbh (__mmask8 __U, __m128 __A, __m128 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 | _mm256_cvtne2ps_pbh (__m256 __A, __m256 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 | _mm256_mask_cvtne2ps_pbh (__m256bh __W, __mmask16 __U, __m256 __A, __m256 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m256bh __DEFAULT_FN_ATTRS256 | _mm256_maskz_cvtne2ps_pbh (__mmask16 __U, __m256 __A, __m256 __B) |
Convert Two Packed Single Data to One Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 | _mm_cvtneps_pbh (__m128 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 | _mm_mask_cvtneps_pbh (__m128bh __W, __mmask8 __U, __m128 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS128 | _mm_maskz_cvtneps_pbh (__mmask8 __U, __m128 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS256 | _mm256_cvtneps_pbh (__m256 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS256 | _mm256_mask_cvtneps_pbh (__m128bh __W, __mmask8 __U, __m256 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m128bh __DEFAULT_FN_ATTRS256 | _mm256_maskz_cvtneps_pbh (__mmask8 __U, __m256 __A) |
Convert Packed Single Data to Packed BF16 Data. More... | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_dpbf16_ps (__m128 __D, __m128bh __A, __m128bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_mask_dpbf16_ps (__m128 __D, __mmask8 __U, __m128bh __A, __m128bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m128 __DEFAULT_FN_ATTRS128 | _mm_maskz_dpbf16_ps (__mmask8 __U, __m128 __D, __m128bh __A, __m128bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_dpbf16_ps (__m256 __D, __m256bh __A, __m256bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_mask_dpbf16_ps (__m256 __D, __mmask8 __U, __m256bh __A, __m256bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_maskz_dpbf16_ps (__mmask8 __U, __m256 __D, __m256bh __A, __m256bh __B) |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision. More... | |
static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 | _mm_cvtness_sbh (float __A) |
Convert One Single float Data to One BF16 Data. More... | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_cvtpbh_ps (__m128bh __A) |
Convert Packed BF16 Data to Packed float Data. More... | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_maskz_cvtpbh_ps (__mmask8 __U, __m128bh __A) |
Convert Packed BF16 Data to Packed float Data using zeroing mask. More... | |
static __inline__ __m256 __DEFAULT_FN_ATTRS256 | _mm256_mask_cvtpbh_ps (__m256 __S, __mmask8 __U, __m128bh __A) |
Convert Packed BF16 Data to Packed float Data using merging mask. More... | |
#define __DEFAULT_FN_ATTRS128 |
Definition at line 18 of file avx512vlbf16intrin.h.
#define __DEFAULT_FN_ATTRS256 |
Definition at line 21 of file avx512vlbf16intrin.h.
typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16))) |
Definition at line 16 of file avx512vlbf16intrin.h.
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 256-bit vector of [8 x float]. |
__B | A 256-bit vector of [8 x float]. |
Definition at line 102 of file avx512vlbf16intrin.h.
Referenced by _mm256_mask_cvtne2ps_pbh(), and _mm256_maskz_cvtne2ps_pbh().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 256-bit vector of [8 x float]. |
Definition at line 222 of file avx512vlbf16intrin.h.
References _mm_undefined_si128().
|
static |
Convert Packed BF16 Data to Packed float Data.
__A | A 128-bit vector of [8 x bfloat]. |
Definition at line 430 of file avx512vlbf16intrin.h.
References _mm256_castsi256_ps(), _mm256_cvtepi16_epi32(), and _mm256_slli_epi32().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 256-bit vector of [16 x bfloat]. |
__B | A 256-bit vector of [16 x bfloat]. |
__D | A 256-bit vector of [8 x float]. |
Definition at line 352 of file avx512vlbf16intrin.h.
References __D.
Referenced by _mm256_mask_dpbf16_ps(), and _mm256_maskz_dpbf16_ps().
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 256-bit vector of [8 x float]. |
__B | A 256-bit vector of [8 x float]. |
__W | A 256-bit vector of [16 x bfloat]. |
__U | A 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element from __W. |
Definition at line 125 of file avx512vlbf16intrin.h.
References _mm256_cvtne2ps_pbh().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 256-bit vector of [8 x float]. |
__W | A 256-bit vector of [8 x bfloat]. |
__U | A 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element from __W. |
Definition at line 243 of file avx512vlbf16intrin.h.
|
static |
Convert Packed BF16 Data to Packed float Data using merging mask.
__S | A 256-bit vector of [8 x float]. Elements are copied from __S when the corresponding mask bit is not set. |
__U | A 8-bit mask. Elements are zeroed out when the corresponding mask bit is not set. |
__A | A 128-bit vector of [8 x bfloat]. |
Definition at line 465 of file avx512vlbf16intrin.h.
References _mm256_castsi256_ps(), _mm256_cvtepi16_epi32(), and _mm256_mask_slli_epi32().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 256-bit vector of [16 x bfloat]. |
__B | A 256-bit vector of [16 x bfloat]. |
__D | A 256-bit vector of [8 x float]. |
__U | A 16-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. |
Definition at line 376 of file avx512vlbf16intrin.h.
References __D, and _mm256_dpbf16_ps().
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 256-bit vector of [8 x float]. |
__B | A 256-bit vector of [8 x float]. |
__U | A 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element is zero. |
Definition at line 147 of file avx512vlbf16intrin.h.
References _mm256_cvtne2ps_pbh(), and _mm256_setzero_si256().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 256-bit vector of [8 x float]. |
__U | A 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element is zero. |
Definition at line 262 of file avx512vlbf16intrin.h.
References _mm_setzero_si128().
|
static |
Convert Packed BF16 Data to Packed float Data using zeroing mask.
__U | A 8-bit mask. Elements are zeroed out when the corresponding mask bit is not set. |
__A | A 128-bit vector of [8 x bfloat]. |
Definition at line 446 of file avx512vlbf16intrin.h.
References _mm256_castsi256_ps(), _mm256_maskz_cvtepi16_epi32(), and _mm256_slli_epi32().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 256-bit vector of [16 x bfloat]. |
__B | A 256-bit vector of [16 x bfloat]. |
__D | A 256-bit vector of [8 x float]. |
__U | A 8-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. |
Definition at line 400 of file avx512vlbf16intrin.h.
References __D, _mm256_dpbf16_ps(), and _mm256_setzero_si256().
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 128-bit vector of [4 x float]. |
__B | A 128-bit vector of [4 x float]. |
Definition at line 38 of file avx512vlbf16intrin.h.
Referenced by _mm_mask_cvtne2ps_pbh(), and _mm_maskz_cvtne2ps_pbh().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 128-bit vector of [4 x float]. |
Definition at line 164 of file avx512vlbf16intrin.h.
References _mm_undefined_si128().
|
static |
Convert One Single float Data to One BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A float data. |
Definition at line 416 of file avx512vlbf16intrin.h.
References _mm_undefined_si128().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 128-bit vector of [8 x bfloat]. |
__B | A 128-bit vector of [8 x bfloat]. |
__D | A 128-bit vector of [4 x float]. |
Definition at line 283 of file avx512vlbf16intrin.h.
References __D.
Referenced by _mm_mask_dpbf16_ps(), and _mm_maskz_dpbf16_ps().
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 128-bit vector of [4 x float]. |
__B | A 128-bit vector of [4 x float]. |
__W | A 128-bit vector of [8 x bfloat]. |
__U | A 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element from __W. |
Definition at line 61 of file avx512vlbf16intrin.h.
References _mm_cvtne2ps_pbh().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 128-bit vector of [4 x float]. |
__W | A 128-bit vector of [8 x bfloat]. |
__U | A 4-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element from __W. |
Definition at line 186 of file avx512vlbf16intrin.h.
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 128-bit vector of [8 x bfloat]. |
__B | A 128-bit vector of [8 x bfloat]. |
__D | A 128-bit vector of [4 x float]. |
__U | A 8-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. |
Definition at line 307 of file avx512vlbf16intrin.h.
References __D, and _mm_dpbf16_ps().
|
static |
Convert Two Packed Single Data to One Packed BF16 Data.
This intrinsic corresponds to the VCVTNE2PS2BF16
instructions.
__A | A 128-bit vector of [4 x float]. |
__B | A 128-bit vector of [4 x float]. |
__U | A 8-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element is zero. |
Definition at line 83 of file avx512vlbf16intrin.h.
References _mm_cvtne2ps_pbh(), and _mm_setzero_si128().
|
static |
Convert Packed Single Data to Packed BF16 Data.
This intrinsic corresponds to the VCVTNEPS2BF16
instructions.
__A | A 128-bit vector of [4 x float]. |
__U | A 4-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element is zero. |
Definition at line 206 of file avx512vlbf16intrin.h.
References _mm_setzero_si128().
|
static |
Dot Product of BF16 Pairs Accumulated into Packed Single Precision.
This intrinsic corresponds to the VDPBF16PS
instructions.
__A | A 128-bit vector of [8 x bfloat]. |
__B | A 128-bit vector of [8 x bfloat]. |
__D | A 128-bit vector of [4 x float]. |
__U | A 8-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. |
Definition at line 331 of file avx512vlbf16intrin.h.
References __D, _mm_dpbf16_ps(), and _mm_setzero_si128().