13#ifndef NO_WARN_X86_INTRINSICS
28#if defined(__linux__) && defined(__ppc64__)
35extern __inline __m128i
36__attribute__((__gnu_inline__, __always_inline__, __artificial__))
39 return (__m128i)
vec_abs ((__v8hi) __A);
42extern __inline __m128i
43__attribute__((__gnu_inline__, __always_inline__, __artificial__))
46 return (__m128i)
vec_abs ((__v4si) __A);
49extern __inline __m128i
50__attribute__((__gnu_inline__, __always_inline__, __artificial__))
53 return (__m128i)
vec_abs ((__v16qi) __A);
57__attribute__((__gnu_inline__, __always_inline__, __artificial__))
60 __v8hi __B = (__v8hi) (__v2du) { __A, __A };
61 return (__m64) ((__v2du)
vec_abs (__B))[0];
65__attribute__((__gnu_inline__, __always_inline__, __artificial__))
68 __v4si __B = (__v4si) (__v2du) { __A, __A };
69 return (__m64) ((__v2du)
vec_abs (__B))[0];
73__attribute__((__gnu_inline__, __always_inline__, __artificial__))
76 __v16qi __B = (__v16qi) (__v2du) { __A, __A };
77 return (__m64) ((__v2du)
vec_abs (__B))[0];
80extern __inline __m128i
81__attribute__((__gnu_inline__, __always_inline__, __artificial__))
84 if (__builtin_constant_p (__count) && __count < 16)
86#ifdef __LITTLE_ENDIAN__
87 __A = (__m128i)
vec_reve ((__v16qu) __A);
88 __B = (__m128i)
vec_reve ((__v16qu) __B);
90 __A = (__m128i)
vec_sld ((__v16qu) __B, (__v16qu) __A, __count);
91#ifdef __LITTLE_ENDIAN__
92 __A = (__m128i)
vec_reve ((__v16qu) __A);
104 const __v16qu zero = { 0 };
105 return (__m128i) zero;
109 const __v16qu __shift =
110 vec_splats ((
unsigned char) ((__count - 16) * 8));
111#ifdef __LITTLE_ENDIAN__
112 return (__m128i)
vec_sro ((__v16qu) __A, __shift);
114 return (__m128i)
vec_slo ((__v16qu) __A, __shift);
120 const __v16qu __shiftA =
121 vec_splats ((
unsigned char) ((16 - __count) * 8));
122 const __v16qu __shiftB =
vec_splats ((
unsigned char) (__count * 8));
123#ifdef __LITTLE_ENDIAN__
124 __A = (__m128i)
vec_slo ((__v16qu) __A, __shiftA);
125 __B = (__m128i)
vec_sro ((__v16qu) __B, __shiftB);
127 __A = (__m128i)
vec_sro ((__v16qu) __A, __shiftA);
128 __B = (__m128i)
vec_slo ((__v16qu) __B, __shiftB);
130 return (__m128i)
vec_or ((__v16qu) __A, (__v16qu) __B);
135__attribute__((__gnu_inline__, __always_inline__, __artificial__))
140 __v2du
__C = { __B, __A };
141#ifdef __LITTLE_ENDIAN__
142 const __v4su __shift = { __count << 3, 0, 0, 0 };
145 const __v4su __shift = { 0, 0, 0, __count << 3 };
148 return (__m64)
__C[0];
152 const __m64 __zero = { 0 };
157extern __inline __m128i
158__attribute__((__gnu_inline__, __always_inline__, __artificial__))
162 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
164 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
165 __v8hi
__C =
vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
166 __v8hi
__D =
vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
170extern __inline __m128i
171__attribute__((__gnu_inline__, __always_inline__, __artificial__))
175 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
177 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
178 __v4si
__C =
vec_perm ((__v4si) __A, (__v4si) __B, __P);
179 __v4si
__D =
vec_perm ((__v4si) __A, (__v4si) __B, __Q);
184__attribute__((__gnu_inline__, __always_inline__, __artificial__))
187 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
189 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
191 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
195 return (__m64) ((__v2du)
__C)[1];
199__attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 __v4si
__C = (__v4si) (__v2du) { __A, __B };
204 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
206 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
210 return (__m64) ((__v2du)
__C)[1];
213extern __inline __m128i
214__attribute__((__gnu_inline__, __always_inline__, __artificial__))
217 __v4si
__C = { 0 },
__D = { 0 };
221 return (__m128i)
__C;
225__attribute__((__gnu_inline__, __always_inline__, __artificial__))
228 const __v4si __zero = { 0 };
229 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
232 return (__m64) ((__v2du)
__C)[1];
235extern __inline __m128i
236__attribute__((__gnu_inline__, __always_inline__, __artificial__))
240 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
242 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
243 __v8hi
__C =
vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
244 __v8hi
__D =
vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
248extern __inline __m128i
249__attribute__((__gnu_inline__, __always_inline__, __artificial__))
253 { 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 };
255 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 };
256 __v4si
__C =
vec_perm ((__v4si) __A, (__v4si) __B, __P);
257 __v4si
__D =
vec_perm ((__v4si) __A, (__v4si) __B, __Q);
262__attribute__((__gnu_inline__, __always_inline__, __artificial__))
266 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
268 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
269 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
273 return (__m64) ((__v2du)
__C)[1];
277__attribute__((__gnu_inline__, __always_inline__, __artificial__))
281 { 0, 1, 2, 3, 8, 9, 10, 11, 0, 1, 2, 3, 8, 9, 10, 11 };
283 { 4, 5, 6, 7, 12, 13, 14, 15, 4, 5, 6, 7, 12, 13, 14, 15 };
284 __v4si
__C = (__v4si) (__v2du) { __A, __B };
288 return (__m64) ((__v2du)
__C)[1];
291extern __inline __m128i
292__attribute__((__gnu_inline__, __always_inline__, __artificial__))
296 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
298 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
299 __v8hi
__C =
vec_perm ((__v8hi) __A, (__v8hi) __B, __P);
300 __v8hi
__D =
vec_perm ((__v8hi) __A, (__v8hi) __B, __Q);
305__attribute__((__gnu_inline__, __always_inline__, __artificial__))
309 { 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 4, 5, 8, 9, 12, 13 };
311 { 2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15 };
312 __v8hi
__C = (__v8hi) (__v2du) { __A, __B };
316 return (__m64) ((__v2du)
__C)[1];
319extern __inline __m128i
320__attribute__((__gnu_inline__, __always_inline__, __artificial__))
323 const __v16qi __zero = { 0 };
324 __vector __bool
char __select =
vec_cmplt ((__v16qi) __B, __zero);
325 __v16qi
__C =
vec_perm ((__v16qi) __A, (__v16qi) __A, (__v16qu) __B);
326 return (__m128i)
vec_sel (
__C, __zero, __select);
330__attribute__((__gnu_inline__, __always_inline__, __artificial__))
333 const __v16qi __zero = { 0 };
334 __v16qi
__C = (__v16qi) (__v2du) { __A, __A };
335 __v16qi
__D = (__v16qi) (__v2du) { __B, __B };
336 __vector __bool
char __select =
vec_cmplt ((__v16qi)
__D, __zero);
339 return (__m64) ((__v2du) (
__C))[0];
342extern __inline __m128i
343__attribute__((__gnu_inline__, __always_inline__, __artificial__))
346 const __v16qi __zero = { 0 };
347 __v16qi __selectneg = (__v16qi)
vec_cmplt ((__v16qi) __B, __zero);
348 __v16qi __selectpos =
354extern __inline __m128i
355__attribute__((__gnu_inline__, __always_inline__, __artificial__))
358 const __v8hi __zero = { 0 };
359 __v8hi __selectneg = (__v8hi)
vec_cmplt ((__v8hi) __B, __zero);
366extern __inline __m128i
367__attribute__((__gnu_inline__, __always_inline__, __artificial__))
370 const __v4si __zero = { 0 };
371 __v4si __selectneg = (__v4si)
vec_cmplt ((__v4si) __B, __zero);
379__attribute__((__gnu_inline__, __always_inline__, __artificial__))
382 const __v16qi __zero = { 0 };
383 __v16qi
__C = (__v16qi) (__v2du) { __A, __A };
384 __v16qi
__D = (__v16qi) (__v2du) { __B, __B };
386 return (__m64) ((__v2du) (
__C))[0];
390__attribute__((__gnu_inline__, __always_inline__, __artificial__))
393 const __v8hi __zero = { 0 };
394 __v8hi
__C = (__v8hi) (__v2du) { __A, __A };
395 __v8hi
__D = (__v8hi) (__v2du) { __B, __B };
397 return (__m64) ((__v2du) (
__C))[0];
401__attribute__((__gnu_inline__, __always_inline__, __artificial__))
404 const __v4si __zero = { 0 };
405 __v4si
__C = (__v4si) (__v2du) { __A, __A };
406 __v4si
__D = (__v4si) (__v2du) { __B, __B };
408 return (__m64) ((__v2du) (
__C))[0];
411extern __inline __m128i
412__attribute__((__gnu_inline__, __always_inline__, __artificial__))
415 __v8hi __unsigned =
vec_splats ((
signed short) 0x00ff);
422 const __v16qu __odds =
423 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
424 const __v16qu __evens =
425 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
428 return (__m128i)
vec_adds (__E, __F);
432__attribute__((__gnu_inline__, __always_inline__, __artificial__))
435 __v8hi
__C = (__v8hi) (__v2du) { __A, __A };
437 const __v8hi __unsigned =
vec_splats ((
signed short) 0x00ff);
439 __v8hi
__D = (__v8hi) (__v2du) { __B, __B };
442 const __v16qu __odds =
443 { 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 };
444 const __v16qu __evens =
445 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 };
449 return (__m64) ((__v2du) (
__C))[0];
452extern __inline __m128i
453__attribute__((__gnu_inline__, __always_inline__, __artificial__))
462 const __v4su __shift =
vec_splats ((
unsigned int) 14);
465 const __v4si __ones =
vec_splats ((
signed int) 1);
474__attribute__((__gnu_inline__, __always_inline__, __artificial__))
477 __v4si
__C = (__v4si) (__v2du) { __A, __A };
479 __v4si
__D = (__v4si) (__v2du) { __B, __B };
482 const __v4su __shift =
vec_splats ((
unsigned int) 14);
484 const __v4si __ones =
vec_splats ((
signed int) 1);
488 return (__m64) ((__v2du) (__E))[0];
492#include_next <tmmintrin.h>
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sro(vector signed char __a, vector signed char __b)
static __ATTRS_o_ai vector bool char vec_reve(vector bool char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ vector signed int __ATTRS_o_ai vec_sld(vector signed int, vector signed int, unsigned const int __c)
static __inline__ vector short __ATTRS_o_ai vec_unpackl(vector signed char __a)
static __inline__ vector int __ATTRS_o_ai vec_sum4s(vector signed char __a, vector int __b)
static __inline__ vector signed char __ATTRS_o_ai vec_mul(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_and(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_slo(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
static vector float __ATTRS_o_ai vec_neg(vector float __a)
static __inline__ vector signed char __ATTRS_o_ai vec_or(vector signed char __a, vector signed char __b)
static __inline__ vector short __ATTRS_o_ai vec_unpackh(vector signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_abs(vector signed char __a)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
static __inline__ unsigned int unsigned char __D
static __inline__ unsigned char int __C
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi16(__m128i __a, __m128i __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi32(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32].
#define _mm_alignr_epi8(a, b, n)
Concatenates the two 128-bit integer vector operands, and right-shifts the result by the number of by...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi16(__m64 __a, __m64 __b)
For each 16-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_shuffle_pi8(__m64 __a, __m64 __b)
Copies the 8-bit integers from a 64-bit integer vector to the destination or clears 8-bit values in t...
#define _mm_alignr_pi8(a, b, n)
Concatenates the two 64-bit integer vector operands, and right-shifts the result by the number of byt...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi32(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [4 x i32...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi8(__m128i __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhrs_epi16(__m128i __a, __m128i __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_maddubs_pi16(__m64 __a, __m64 __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi32(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi16(__m128i __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maddubs_epi16(__m128i __a, __m128i __b)
Multiplies corresponding pairs of packed 8-bit unsigned integer values contained in the first source ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi8(__m64 __a)
Computes the absolute value of each of the packed 8-bit signed integers in the source operand and sto...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi8(__m128i __a, __m128i __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsub_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hsubs_epi16(__m128i __a, __m128i __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi16(__m64 __a)
Computes the absolute value of each of the packed 16-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadds_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi8(__m64 __a, __m64 __b)
For each 8-bit integer in the first source operand, perform one of the following actions as specified...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sign_pi32(__m64 __a, __m64 __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhrs_pi16(__m64 __a, __m64 __b)
Multiplies packed 16-bit signed integer values, truncates the 32-bit products to the 18 most signific...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hadd_pi16(__m64 __a, __m64 __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16].
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsubs_pi16(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [4 x i16]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadds_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sign_epi32(__m128i __a, __m128i __b)
For each 32-bit integer in the first source operand, perform one of the following actions as specifie...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_hadd_epi16(__m128i __a, __m128i __b)
Horizontally adds the adjacent pairs of values contained in 2 packed 128-bit vectors of [8 x i16].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_shuffle_epi8(__m128i __a, __m128i __b)
Copies the 8-bit integers from a 128-bit integer vector to the destination or clears 8-bit values in ...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_hsub_pi32(__m64 __a, __m64 __b)
Horizontally subtracts the adjacent pairs of values contained in 2 packed 64-bit vectors of [2 x i32]...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_abs_pi32(__m64 __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...