16 #define __DEFAULT_FN_ATTRS \
17 __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
36 return (__m128i)__builtin_ia32_lddqu((
char const *)
__p);
55 return __builtin_ia32_addsubps((__v4sf)
__a, (__v4sf)
__b);
78 return __builtin_ia32_haddps((__v4sf)
__a, (__v4sf)
__b);
101 return __builtin_ia32_hsubps((__v4sf)
__a, (__v4sf)
__b);
123 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
__a, 1, 1, 3, 3);
144 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
__a, 0, 0, 2, 2);
163 return __builtin_ia32_addsubpd((__v2df)
__a, (__v2df)
__b);
186 return __builtin_ia32_haddpd((__v2df)
__a, (__v2df)
__b);
209 return __builtin_ia32_hsubpd((__v2df)
__a, (__v2df)
__b);
227 #define _mm_loaddup_pd(dp) _mm_load1_pd(dp)
245 return __builtin_shufflevector((__v2df)
__a, (__v2df)
__a, 0, 0);
266 __builtin_ia32_monitor(
__p, __extensions, __hints);
285 __builtin_ia32_mwait(__extensions, __hints);
288 #undef __DEFAULT_FN_ATTRS
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ vector float vector float __b
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_lddqu_si128(__m128i const *__p)
Loads data from an unaligned memory location to elements in a 128-bit vector.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hadd_pd(__m128d __a, __m128d __b)
Horizontally adds the pairs of values contained in two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
#define __DEFAULT_FN_ATTRS
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hadd_ps(__m128 __a, __m128 __b)
Horizontally adds the adjacent pairs of values contained in two 128-bit vectors of [4 x float].
static __inline__ void __DEFAULT_FN_ATTRS _mm_mwait(unsigned __extensions, unsigned __hints)
Used with the MONITOR instruction to wait while the processor is in the monitor event pending state.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_addsub_pd(__m128d __a, __m128d __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [2 x doub...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_hsub_pd(__m128d __a, __m128d __b)
Horizontally subtracts the pairs of values contained in two 128-bit vectors of [2 x double].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_addsub_ps(__m128 __a, __m128 __b)
Adds the even-indexed values and subtracts the odd-indexed values of two 128-bit vectors of [4 x floa...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_hsub_ps(__m128 __a, __m128 __b)
Horizontally subtracts the adjacent pairs of values contained in two 128-bit vectors of [4 x float].
static __inline__ void __DEFAULT_FN_ATTRS _mm_monitor(void const *__p, unsigned __extensions, unsigned __hints)
Establishes a linear address memory range to be monitored and puts the processor in the monitor event...