13 #ifndef NO_WARN_X86_INTRINSICS
28 #if defined(__linux__) && defined(__ppc64__)
33 #include <pmmintrin.h>
35 extern __inline __m128i
36 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
39 return (__m128i)
vec_abs ((__v8hi) __A);
42 extern __inline __m128i
43 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
46 return (__m128i)
vec_abs ((__v4si) __A);
49 extern __inline __m128i
50 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
53 return (__m128i)
vec_abs ((__v16qi) __A);
57 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
60 __v8hi __B = (__v8hi) (__v2du) { __A, __A };
61 return (__m64) ((__v2du)
vec_abs (__B))[0];
65 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
68 __v4si __B = (__v4si) (__v2du) { __A, __A };
69 return (__m64) ((__v2du)
vec_abs (__B))[0];
73 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
76 __v16qi __B = (__v16qi) (__v2du) { __A, __A };
77 return (__m64) ((__v2du)
vec_abs (__B))[0];
80 extern __inline __m128i
81 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84 if (__builtin_constant_p (__count) && __count < 16)
86 #ifdef __LITTLE_ENDIAN__
87 __A = (__m128i)
vec_reve ((__v16qu) __A);
88 __B = (__m128i)
vec_reve ((__v16qu) __B);
90 __A = (__m128i)
vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
91 #ifdef __LITTLE_ENDIAN__
92 __A = (__m128i)
vec_reve ((__v16qu) __A);
104 const __v16qu zero = { 0 };
105 return (__m128i) zero;
109 const __v16qu __shift =
110 vec_splats ((
unsigned char) ((__count - 16) * 8));
111 #ifdef __LITTLE_ENDIAN__
112 return (__m128i)
vec_sro ((__v16qu) __A, __shift);
114 return (__m128i)
vec_slo ((__v16qu) __A, __shift);
120 const __v16qu __shiftA =
121 vec_splats ((
unsigned char) ((16 - __count) * 8));
122 const __v16qu __shiftB =
vec_splats ((
unsigned char) (__count * 8));
123 #ifdef __LITTLE_ENDIAN__
124 __A = (__m128i)
vec_slo ((__v16qu) __A, __shiftA);
125 __B = (__m128i)
vec_sro ((__v16qu) __B, __shiftB);
127 __A = (__m128i)
vec_sro ((__v16qu) __A, __shiftA);
128 __B = (__m128i)
vec_slo ((__v16qu) __B, __shiftB);
130 return (__m128i)
vec_or ((__v16qu) __A, (__v16qu) __B);
134 extern __inline __m64
135 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
140 __v2du
__C = { __B, __A };
141 #ifdef __LITTLE_ENDIAN__
142 const __v4su __shift = { __count << 3, 0, 0, 0 };
145 const __v4su __shift = { 0, 0, 0, __count << 3 };
148 return (__m64)
__C[0];
152 const __m64 __zero = { 0 };
157 extern __inline __m128i
158 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
164 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
165 __v8hi
__C =
vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
166 __v8hi
__D =
vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
170 extern __inline __m128i
171 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
175 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
177 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
178 __v4si
__C =
vec_perm ((__v4si) __A, (__v4si) __B, __P);
179 __v4si
__D =
vec_perm ((__v4si) __A, (__v4si) __B, __Q);
183 extern __inline __m64
184 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
187 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
189 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
191 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
195 return (__m64) ((__v2du)
__C)[1];
198 extern __inline __m64
199 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 __v4si
__C = (__v4si) (__v2du) { __A, __B };
204 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
206 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
210 return (__m64) ((__v2du)
__C)[1];
213 extern __inline __m128i
214 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
217 __v4si
__C = { 0 },
__D = { 0 };
221 return (__m128i)
__C;
224 extern __inline __m64
225 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
228 const __v4si __zero = { 0 };
229 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
232 return (__m64) ((__v2du)
__C)[1];
235 extern __inline __m128i
236 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
240 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
242 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
243 __v8hi
__C =
vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
244 __v8hi
__D =
vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
248 extern __inline __m128i
249 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
253 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
255 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
256 __v4si
__C =
vec_perm ((__v4si) __A, (__v4si) __B, __P);
257 __v4si
__D =
vec_perm ((__v4si) __A, (__v4si) __B, __Q);
261 extern __inline __m64
262 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
266 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
268 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
269 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
273 return (__m64) ((__v2du)
__C)[1];
276 extern __inline __m64
277 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
281 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
283 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
284 __v4si
__C = (__v4si) (__v2du) { __A, __B };
288 return (__m64) ((__v2du)
__C)[1];
291 extern __inline __m128i
292 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
296 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
298 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
299 __v8hi
__C =
vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
300 __v8hi
__D =
vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
304 extern __inline __m64
305 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
309 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
311 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
312 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
316 return (__m64) ((__v2du)
__C)[1];
319 extern __inline __m128i
320 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
323 const __v16qi __zero = { 0 };
324 __vector __bool
char __select =
vec_cmplt ((__v16qi) __B, __zero);
325 __v16qi
__C =
vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
326 return (__m128i)
vec_sel (
__C, __zero, __select);
329 extern __inline __m64
330 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
333 const __v16qi __zero = { 0 };
334 __v16qi
__C = (__v16qi) (__v2du) { __A, __A };
335 __v16qi
__D = (__v16qi) (__v2du) { __B, __B };
336 __vector __bool
char __select =
vec_cmplt ((__v16qi)
__D, __zero);
339 return (__m64) ((__v2du) (
__C))[0];
342 extern __inline __m128i
343 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
346 const __v16qi __zero = { 0 };
347 __v16qi __selectneg = (__v16qi)
vec_cmplt ((__v16qi) __B, __zero);
348 __v16qi __selectpos =
354 extern __inline __m128i
355 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
358 const __v8hi __zero = { 0 };
359 __v8hi __selectneg = (__v8hi)
vec_cmplt ((__v8hi) __B, __zero);
366 extern __inline __m128i
367 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
370 const __v4si __zero = { 0 };
371 __v4si __selectneg = (__v4si)
vec_cmplt ((__v4si) __B, __zero);
378 extern __inline __m64
379 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
382 const __v16qi __zero = { 0 };
383 __v16qi
__C = (__v16qi) (__v2du) { __A, __A };
384 __v16qi
__D = (__v16qi) (__v2du) { __B, __B };
386 return (__m64) ((__v2du) (
__C))[0];
389 extern __inline __m64
390 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
393 const __v8hi __zero = { 0 };
394 __v8hi
__C = (__v8hi) (__v2du) { __A, __A };
395 __v8hi
__D = (__v8hi) (__v2du) { __B, __B };
397 return (__m64) ((__v2du) (
__C))[0];
400 extern __inline __m64
401 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404 const __v4si __zero = { 0 };
405 __v4si
__C = (__v4si) (__v2du) { __A, __A };
406 __v4si
__D = (__v4si) (__v2du) { __B, __B };
408 return (__m64) ((__v2du) (
__C))[0];
411 extern __inline __m128i
412 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
415 __v8hi __unsigned =
vec_splats ((
signed short) 0x00ff);
422 const __v16qu __odds =
423 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
424 const __v16qu __evens =
425 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
428 return (__m128i)
vec_adds (__E, __F);
431 extern __inline __m64
432 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
435 __v8hi
__C = (__v8hi) (__v2du) { __A, __A };
437 const __v8hi __unsigned =
vec_splats ((
signed short) 0x00ff);
439 __v8hi
__D = (__v8hi) (__v2du) { __B, __B };
442 const __v16qu __odds =
443 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
444 const __v16qu __evens =
445 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
449 return (__m64) ((__v2du) (
__C))[0];
452 extern __inline __m128i
453 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
462 const __v4su __shift =
vec_splats ((
unsigned int) 14);
465 const __v4si __ones =
vec_splats ((
signed int) 1);
473 extern __inline __m64
474 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
477 __v4si
__C = (__v4si) (__v2du) { __A, __A };
479 __v4si
__D = (__v4si) (__v2du) { __B, __B };
482 const __v4su __shift =
vec_splats ((
unsigned int) 14);
484 const __v4si __ones =
vec_splats ((
signed int) 1);
488 return (__m64) ((__v2du) (__E))[0];
492 #include_next <tmmintrin.h>
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
static __ATTRS_o_ai vector bool char vec_reve(vector bool char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
static vector float __ATTRS_o_ai vec_neg(vector float __a)
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
static __inline__ unsigned int unsigned char __D
static __inline__ unsigned char int __C
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
#define _mm_alignr_epi8(a, b, n)
Concatenates the two 128-bit integer vector operands, and right-shifts the result by the number of by...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
#define _mm_alignr_pi8(a, b, n)
Concatenates the two 64-bit integer vector operands, and right-shifts the result by the number of byt...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...