This graph shows which files directly or indirectly include this file:

Macros
#define	__DEFAULT_FN_ATTRS512

#define	__DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))

Typedefs
typedef short __m512bh	__attribute__((__vector_size__(64), __aligned__(64)))

typedef unsigned short	__bfloat16

Functions
static __inline__ float __DEFAULT_FN_ATTRS	_mm_cvtsbh_ss (__bfloat16 __A)
	Convert One BF16 Data to One Single Float Data.

static __inline__ __m512bh __DEFAULT_FN_ATTRS512	_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
	Convert Two Packed Single Data to One Packed BF16 Data.

static __inline__ __m512bh __DEFAULT_FN_ATTRS512	_mm512_mask_cvtne2ps_pbh (__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B)
	Convert Two Packed Single Data to One Packed BF16 Data.

static __inline__ __m512bh __DEFAULT_FN_ATTRS512	_mm512_maskz_cvtne2ps_pbh (__mmask32 __U, __m512 __A, __m512 __B)
	Convert Two Packed Single Data to One Packed BF16 Data.

static __inline__ __m256bh __DEFAULT_FN_ATTRS512	_mm512_cvtneps_pbh (__m512 __A)
	Convert Packed Single Data to Packed BF16 Data.

static __inline__ __m256bh __DEFAULT_FN_ATTRS512	_mm512_mask_cvtneps_pbh (__m256bh __W, __mmask16 __U, __m512 __A)
	Convert Packed Single Data to Packed BF16 Data.

static __inline__ __m256bh __DEFAULT_FN_ATTRS512	_mm512_maskz_cvtneps_pbh (__mmask16 __U, __m512 __A)
	Convert Packed Single Data to Packed BF16 Data.

static __inline__ __m512 __DEFAULT_FN_ATTRS512	_mm512_dpbf16_ps (__m512 __D, __m512bh __A, __m512bh __B)
	Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

static __inline__ __m512 __DEFAULT_FN_ATTRS512	_mm512_mask_dpbf16_ps (__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B)
	Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

static __inline__ __m512 __DEFAULT_FN_ATTRS512	_mm512_maskz_dpbf16_ps (__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B)
	Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

static __inline__ __m512 __DEFAULT_FN_ATTRS512	_mm512_cvtpbh_ps (__m256bh __A)
	Convert Packed BF16 Data to Packed float Data.

static __inline__ __m512 __DEFAULT_FN_ATTRS512	_mm512_maskz_cvtpbh_ps (__mmask16 __U, __m256bh __A)
	Convert Packed BF16 Data to Packed float Data using zeroing mask.

static __inline__ __m512 __DEFAULT_FN_ATTRS512	_mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
	Convert Packed BF16 Data to Packed float Data using merging mask.

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS

#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16")))

Definition at line 23 of file avx512bf16intrin.h.

◆ __DEFAULT_FN_ATTRS512

#define __DEFAULT_FN_ATTRS512

Value:

__attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \

__min_vector_width__(512)))

__attribute__

short __m512bh __attribute__((__vector_size__(64), __aligned__(64)))

Definition avx512bf16intrin.h:16

Definition at line 20 of file avx512bf16intrin.h.

Typedef Documentation

◆ attribute

typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32)))

Definition at line 16 of file avx512bf16intrin.h.

◆ __bfloat16

typedef unsigned short __bfloat16

Definition at line 18 of file avx512bf16intrin.h.

Function Documentation

◆ _mm512_cvtne2ps_pbh()

static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_cvtne2ps_pbh	(	__m512	__A,
		__m512	__B
	)

static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters

__A	A 512-bit vector of [16 x float].
__B	A 512-bit vector of [16 x float].

Returns: A 512-bit vector of [32 x bfloat] whose lower 256 bits come from conversion of __B, and higher 256 bits come from conversion of __A.

Definition at line 53 of file avx512bf16intrin.h.

Referenced by _mm512_mask_cvtne2ps_pbh(), and _mm512_maskz_cvtne2ps_pbh().

Here is the caller graph for this function:

◆ _mm512_cvtneps_pbh()

static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_cvtneps_pbh ( __m512 __A )

static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters

__A	A 512-bit vector of [16 x float].

Returns: A 256-bit vector of [16 x bfloat] come from conversion of __A.

Definition at line 114 of file avx512bf16intrin.h.

References _mm256_undefined_si256().

Here is the call graph for this function:

◆ _mm512_cvtpbh_ps()

static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps ( __m256bh __A )

static

Convert Packed BF16 Data to Packed float Data.

Parameters

__A	A 256-bit vector of [16 x bfloat].

Returns: A 512-bit vector of [16 x float] come from convertion of __A

Definition at line 236 of file avx512bf16intrin.h.

References _mm512_castsi512_ps(), _mm512_cvtepi16_epi32(), and _mm512_slli_epi32().

Here is the call graph for this function:

◆ _mm512_dpbf16_ps()

static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_dpbf16_ps	(	__m512	__D,
		__m512bh	__A,
		__m512bh	__B
	)

static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters

__A	A 512-bit vector of [32 x bfloat].
__B	A 512-bit vector of [32 x bfloat].
__D	A 512-bit vector of [16 x float].

Returns: A 512-bit vector of [16 x float] comes from Dot Product of __A, __B and __D

Definition at line 175 of file avx512bf16intrin.h.

References __D.

Referenced by _mm512_mask_dpbf16_ps(), and _mm512_maskz_dpbf16_ps().

Here is the caller graph for this function:

◆ _mm512_mask_cvtne2ps_pbh()

static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ps_pbh	(	__m512bh	__W,
		__mmask32	__U,
		__m512	__A,
		__m512	__B
	)

static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters

__A	A 512-bit vector of [16 x float].
__B	A 512-bit vector of [16 x float].
__W	A 512-bit vector of [32 x bfloat].
__U	A 32-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element from __W.

Returns: A 512-bit vector of [32 x bfloat] whose lower 256 bits come from conversion of __B, and higher 256 bits come from conversion of __A.

Definition at line 76 of file avx512bf16intrin.h.

References _mm512_cvtne2ps_pbh().

Here is the call graph for this function:

◆ _mm512_mask_cvtneps_pbh()

static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_mask_cvtneps_pbh	(	__m256bh	__W,
		__mmask16	__U,
		__m512	__A
	)

static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters

__A	A 512-bit vector of [16 x float].
__W	A 256-bit vector of [16 x bfloat].
__U	A 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element from __W.

Returns: A 256-bit vector of [16 x bfloat] come from conversion of __A.

Definition at line 135 of file avx512bf16intrin.h.

◆ _mm512_mask_cvtpbh_ps()

static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpbh_ps	(	__m512	__S,
		__mmask16	__U,
		__m256bh	__A
	)

static

Convert Packed BF16 Data to Packed float Data using merging mask.

Parameters

__S	A 512-bit vector of [16 x float]. Elements are copied from __S when the corresponding mask bit is not set.
__U	A 16-bit mask.
__A	A 256-bit vector of [16 x bfloat].

Returns: A 512-bit vector of [16 x float] come from convertion of __A

Definition at line 270 of file avx512bf16intrin.h.

References _mm512_castsi512_ps(), _mm512_cvtepi16_epi32(), and _mm512_mask_slli_epi32().

Here is the call graph for this function:

◆ _mm512_mask_dpbf16_ps()

static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_dpbf16_ps	(	__m512	__D,
		__mmask16	__U,
		__m512bh	__A,
		__m512bh	__B
	)

static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters

__A	A 512-bit vector of [32 x bfloat].
__B	A 512-bit vector of [32 x bfloat].
__D	A 512-bit vector of [16 x float].
__U	A 16-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means __D.

Returns: A 512-bit vector of [16 x float] comes from Dot Product of __A, __B and __D

Definition at line 199 of file avx512bf16intrin.h.

References __D, and _mm512_dpbf16_ps().

Here is the call graph for this function:

◆ _mm512_maskz_cvtne2ps_pbh()

static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtne2ps_pbh	(	__mmask32	__U,
		__m512	__A,
		__m512	__B
	)

static

Convert Two Packed Single Data to One Packed BF16 Data.

This intrinsic corresponds to the VCVTNE2PS2BF16 instructions.

Parameters

__A	A 512-bit vector of [16 x float].
__B	A 512-bit vector of [16 x float].
__U	A 32-bit mask value specifying what is chosen for each element. A 1 means conversion of __A or __B. A 0 means element is zero.

Returns: A 512-bit vector of [32 x bfloat] whose lower 256 bits come from conversion of __B, and higher 256 bits come from conversion of __A.

Definition at line 98 of file avx512bf16intrin.h.

References _mm512_cvtne2ps_pbh(), and _mm512_setzero_si512().

Here is the call graph for this function:

◆ _mm512_maskz_cvtneps_pbh()

static __inline__ __m256bh __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtneps_pbh	(	__mmask16	__U,
		__m512	__A
	)

static

Convert Packed Single Data to Packed BF16 Data.

This intrinsic corresponds to the VCVTNEPS2BF16 instructions.

Parameters

__A	A 512-bit vector of [16 x float].
__U	A 16-bit mask value specifying what is chosen for each element. A 1 means conversion of __A. A 0 means element is zero.

Returns: A 256-bit vector of [16 x bfloat] come from conversion of __A.

Definition at line 154 of file avx512bf16intrin.h.

References _mm256_setzero_si256().

Here is the call graph for this function:

◆ _mm512_maskz_cvtpbh_ps()

static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpbh_ps	(	__mmask16	__U,
		__m256bh	__A
	)

static

Convert Packed BF16 Data to Packed float Data using zeroing mask.

Parameters

__U	A 16-bit mask. Elements are zeroed out when the corresponding mask bit is not set.
__A	A 256-bit vector of [16 x bfloat].

Returns: A 512-bit vector of [16 x float] come from convertion of __A

Definition at line 252 of file avx512bf16intrin.h.

References _mm512_castsi512_ps(), _mm512_maskz_cvtepi16_epi32(), and _mm512_slli_epi32().

Here is the call graph for this function:

◆ _mm512_maskz_dpbf16_ps()

static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_dpbf16_ps	(	__mmask16	__U,
		__m512	__D,
		__m512bh	__A,
		__m512bh	__B
	)

static

Dot Product of BF16 Pairs Accumulated into Packed Single Precision.

This intrinsic corresponds to the VDPBF16PS instructions.

Parameters

__A	A 512-bit vector of [32 x bfloat].
__B	A 512-bit vector of [32 x bfloat].
__D	A 512-bit vector of [16 x float].
__U	A 16-bit mask value specifying what is chosen for each element. A 1 means __A and __B's dot product accumulated with __D. A 0 means 0.

Returns: A 512-bit vector of [16 x float] comes from Dot Product of __A, __B and __D

Definition at line 223 of file avx512bf16intrin.h.

References __D, _mm512_dpbf16_ps(), and _mm512_setzero_si512().

Here is the call graph for this function:

◆ _mm_cvtsbh_ss()

static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss ( __bfloat16 __A )

static

Convert One BF16 Data to One Single Float Data.

This intrinsic does not correspond to a specific instruction.

Parameters

__A	A bfloat data.

Returns: A float data whose sign field and exponent field keep unchanged, and fraction field is extended to 23 bits.

Definition at line 36 of file avx512bf16intrin.h.

Macros

Typedefs

Functions

Macro Definition Documentation

◆ __DEFAULT_FN_ATTRS

◆ __DEFAULT_FN_ATTRS512

Typedef Documentation

◆ __attribute__

◆ __bfloat16

Function Documentation

◆ _mm512_cvtne2ps_pbh()

◆ _mm512_cvtneps_pbh()

◆ _mm512_cvtpbh_ps()

◆ _mm512_dpbf16_ps()

◆ _mm512_mask_cvtne2ps_pbh()

◆ _mm512_mask_cvtneps_pbh()

◆ _mm512_mask_cvtpbh_ps()

◆ _mm512_mask_dpbf16_ps()

◆ _mm512_maskz_cvtne2ps_pbh()

◆ _mm512_maskz_cvtneps_pbh()

◆ _mm512_maskz_cvtpbh_ps()

◆ _mm512_maskz_dpbf16_ps()

◆ _mm_cvtsbh_ss()

◆ attribute