ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
emmintrin.h
Go to the documentation of this file.
1 /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 
10 #ifndef __EMMINTRIN_H
11 #define __EMMINTRIN_H
12 
13 #include <xmmintrin.h>
14 
15 typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
16 typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
17 
18 typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
19 typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
20 
21 /* Type defines. */
22 typedef double __v2df __attribute__ ((__vector_size__ (16)));
23 typedef long long __v2di __attribute__ ((__vector_size__ (16)));
24 typedef short __v8hi __attribute__((__vector_size__(16)));
25 typedef char __v16qi __attribute__((__vector_size__(16)));
26 
27 /* Unsigned types */
28 typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
29 typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
30 typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
31 
32 /* We need an explicitly signed variant for char. Note that this shouldn't
33  * appear in the interface though. */
34 typedef signed char __v16qs __attribute__((__vector_size__(16)));
35 
36 /* Define the default attributes for the functions in this file. */
37 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128)))
38 #define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64)))
39 
55 static __inline__ __m128d __DEFAULT_FN_ATTRS
56 _mm_add_sd(__m128d __a, __m128d __b)
57 {
58  __a[0] += __b[0];
59  return __a;
60 }
61 
74 static __inline__ __m128d __DEFAULT_FN_ATTRS
75 _mm_add_pd(__m128d __a, __m128d __b)
76 {
77  return (__m128d)((__v2df)__a + (__v2df)__b);
78 }
79 
97 static __inline__ __m128d __DEFAULT_FN_ATTRS
98 _mm_sub_sd(__m128d __a, __m128d __b)
99 {
100  __a[0] -= __b[0];
101  return __a;
102 }
103 
116 static __inline__ __m128d __DEFAULT_FN_ATTRS
117 _mm_sub_pd(__m128d __a, __m128d __b)
118 {
119  return (__m128d)((__v2df)__a - (__v2df)__b);
120 }
121 
138 static __inline__ __m128d __DEFAULT_FN_ATTRS
139 _mm_mul_sd(__m128d __a, __m128d __b)
140 {
141  __a[0] *= __b[0];
142  return __a;
143 }
144 
157 static __inline__ __m128d __DEFAULT_FN_ATTRS
158 _mm_mul_pd(__m128d __a, __m128d __b)
159 {
160  return (__m128d)((__v2df)__a * (__v2df)__b);
161 }
162 
180 static __inline__ __m128d __DEFAULT_FN_ATTRS
181 _mm_div_sd(__m128d __a, __m128d __b)
182 {
183  __a[0] /= __b[0];
184  return __a;
185 }
186 
200 static __inline__ __m128d __DEFAULT_FN_ATTRS
201 _mm_div_pd(__m128d __a, __m128d __b)
202 {
203  return (__m128d)((__v2df)__a / (__v2df)__b);
204 }
205 
225 static __inline__ __m128d __DEFAULT_FN_ATTRS
226 _mm_sqrt_sd(__m128d __a, __m128d __b)
227 {
228  __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
229  return __extension__ (__m128d) { __c[0], __a[1] };
230 }
231 
243 static __inline__ __m128d __DEFAULT_FN_ATTRS
244 _mm_sqrt_pd(__m128d __a)
245 {
246  return __builtin_ia32_sqrtpd((__v2df)__a);
247 }
248 
267 static __inline__ __m128d __DEFAULT_FN_ATTRS
268 _mm_min_sd(__m128d __a, __m128d __b)
269 {
270  return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
271 }
272 
287 static __inline__ __m128d __DEFAULT_FN_ATTRS
288 _mm_min_pd(__m128d __a, __m128d __b)
289 {
290  return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
291 }
292 
311 static __inline__ __m128d __DEFAULT_FN_ATTRS
312 _mm_max_sd(__m128d __a, __m128d __b)
313 {
314  return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
315 }
316 
331 static __inline__ __m128d __DEFAULT_FN_ATTRS
332 _mm_max_pd(__m128d __a, __m128d __b)
333 {
334  return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
335 }
336 
349 static __inline__ __m128d __DEFAULT_FN_ATTRS
350 _mm_and_pd(__m128d __a, __m128d __b)
351 {
352  return (__m128d)((__v2du)__a & (__v2du)__b);
353 }
354 
370 static __inline__ __m128d __DEFAULT_FN_ATTRS
371 _mm_andnot_pd(__m128d __a, __m128d __b)
372 {
373  return (__m128d)(~(__v2du)__a & (__v2du)__b);
374 }
375 
388 static __inline__ __m128d __DEFAULT_FN_ATTRS
389 _mm_or_pd(__m128d __a, __m128d __b)
390 {
391  return (__m128d)((__v2du)__a | (__v2du)__b);
392 }
393 
406 static __inline__ __m128d __DEFAULT_FN_ATTRS
407 _mm_xor_pd(__m128d __a, __m128d __b)
408 {
409  return (__m128d)((__v2du)__a ^ (__v2du)__b);
410 }
411 
425 static __inline__ __m128d __DEFAULT_FN_ATTRS
426 _mm_cmpeq_pd(__m128d __a, __m128d __b)
427 {
428  return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
429 }
430 
445 static __inline__ __m128d __DEFAULT_FN_ATTRS
446 _mm_cmplt_pd(__m128d __a, __m128d __b)
447 {
448  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
449 }
450 
466 static __inline__ __m128d __DEFAULT_FN_ATTRS
467 _mm_cmple_pd(__m128d __a, __m128d __b)
468 {
469  return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
470 }
471 
487 static __inline__ __m128d __DEFAULT_FN_ATTRS
488 _mm_cmpgt_pd(__m128d __a, __m128d __b)
489 {
490  return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
491 }
492 
508 static __inline__ __m128d __DEFAULT_FN_ATTRS
509 _mm_cmpge_pd(__m128d __a, __m128d __b)
510 {
511  return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
512 }
513 
531 static __inline__ __m128d __DEFAULT_FN_ATTRS
532 _mm_cmpord_pd(__m128d __a, __m128d __b)
533 {
534  return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
535 }
536 
555 static __inline__ __m128d __DEFAULT_FN_ATTRS
556 _mm_cmpunord_pd(__m128d __a, __m128d __b)
557 {
558  return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
559 }
560 
576 static __inline__ __m128d __DEFAULT_FN_ATTRS
577 _mm_cmpneq_pd(__m128d __a, __m128d __b)
578 {
579  return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
580 }
581 
597 static __inline__ __m128d __DEFAULT_FN_ATTRS
598 _mm_cmpnlt_pd(__m128d __a, __m128d __b)
599 {
600  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
601 }
602 
618 static __inline__ __m128d __DEFAULT_FN_ATTRS
619 _mm_cmpnle_pd(__m128d __a, __m128d __b)
620 {
621  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
622 }
623 
639 static __inline__ __m128d __DEFAULT_FN_ATTRS
640 _mm_cmpngt_pd(__m128d __a, __m128d __b)
641 {
642  return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
643 }
644 
660 static __inline__ __m128d __DEFAULT_FN_ATTRS
661 _mm_cmpnge_pd(__m128d __a, __m128d __b)
662 {
663  return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
664 }
665 
683 static __inline__ __m128d __DEFAULT_FN_ATTRS
684 _mm_cmpeq_sd(__m128d __a, __m128d __b)
685 {
686  return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
687 }
688 
708 static __inline__ __m128d __DEFAULT_FN_ATTRS
709 _mm_cmplt_sd(__m128d __a, __m128d __b)
710 {
711  return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
712 }
713 
733 static __inline__ __m128d __DEFAULT_FN_ATTRS
734 _mm_cmple_sd(__m128d __a, __m128d __b)
735 {
736  return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
737 }
738 
758 static __inline__ __m128d __DEFAULT_FN_ATTRS
759 _mm_cmpgt_sd(__m128d __a, __m128d __b)
760 {
761  __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
762  return __extension__ (__m128d) { __c[0], __a[1] };
763 }
764 
784 static __inline__ __m128d __DEFAULT_FN_ATTRS
785 _mm_cmpge_sd(__m128d __a, __m128d __b)
786 {
787  __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
788  return __extension__ (__m128d) { __c[0], __a[1] };
789 }
790 
812 static __inline__ __m128d __DEFAULT_FN_ATTRS
813 _mm_cmpord_sd(__m128d __a, __m128d __b)
814 {
815  return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
816 }
817 
840 static __inline__ __m128d __DEFAULT_FN_ATTRS
841 _mm_cmpunord_sd(__m128d __a, __m128d __b)
842 {
843  return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
844 }
845 
865 static __inline__ __m128d __DEFAULT_FN_ATTRS
866 _mm_cmpneq_sd(__m128d __a, __m128d __b)
867 {
868  return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
869 }
870 
890 static __inline__ __m128d __DEFAULT_FN_ATTRS
891 _mm_cmpnlt_sd(__m128d __a, __m128d __b)
892 {
893  return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
894 }
895 
915 static __inline__ __m128d __DEFAULT_FN_ATTRS
916 _mm_cmpnle_sd(__m128d __a, __m128d __b)
917 {
918  return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
919 }
920 
940 static __inline__ __m128d __DEFAULT_FN_ATTRS
941 _mm_cmpngt_sd(__m128d __a, __m128d __b)
942 {
943  __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
944  return __extension__ (__m128d) { __c[0], __a[1] };
945 }
946 
966 static __inline__ __m128d __DEFAULT_FN_ATTRS
967 _mm_cmpnge_sd(__m128d __a, __m128d __b)
968 {
969  __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
970  return __extension__ (__m128d) { __c[0], __a[1] };
971 }
972 
991 static __inline__ int __DEFAULT_FN_ATTRS
992 _mm_comieq_sd(__m128d __a, __m128d __b)
993 {
994  return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
995 }
996 
1017 static __inline__ int __DEFAULT_FN_ATTRS
1018 _mm_comilt_sd(__m128d __a, __m128d __b)
1019 {
1020  return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
1021 }
1022 
1043 static __inline__ int __DEFAULT_FN_ATTRS
1044 _mm_comile_sd(__m128d __a, __m128d __b)
1045 {
1046  return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
1047 }
1048 
1069 static __inline__ int __DEFAULT_FN_ATTRS
1070 _mm_comigt_sd(__m128d __a, __m128d __b)
1071 {
1072  return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
1073 }
1074 
1095 static __inline__ int __DEFAULT_FN_ATTRS
1096 _mm_comige_sd(__m128d __a, __m128d __b)
1097 {
1098  return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
1099 }
1100 
1121 static __inline__ int __DEFAULT_FN_ATTRS
1122 _mm_comineq_sd(__m128d __a, __m128d __b)
1123 {
1124  return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
1125 }
1126 
1145 static __inline__ int __DEFAULT_FN_ATTRS
1146 _mm_ucomieq_sd(__m128d __a, __m128d __b)
1147 {
1148  return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
1149 }
1150 
1171 static __inline__ int __DEFAULT_FN_ATTRS
1172 _mm_ucomilt_sd(__m128d __a, __m128d __b)
1173 {
1174  return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
1175 }
1176 
1197 static __inline__ int __DEFAULT_FN_ATTRS
1198 _mm_ucomile_sd(__m128d __a, __m128d __b)
1199 {
1200  return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
1201 }
1202 
1223 static __inline__ int __DEFAULT_FN_ATTRS
1224 _mm_ucomigt_sd(__m128d __a, __m128d __b)
1225 {
1226  return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
1227 }
1228 
1249 static __inline__ int __DEFAULT_FN_ATTRS
1250 _mm_ucomige_sd(__m128d __a, __m128d __b)
1251 {
1252  return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
1253 }
1254 
1275 static __inline__ int __DEFAULT_FN_ATTRS
1276 _mm_ucomineq_sd(__m128d __a, __m128d __b)
1277 {
1278  return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
1279 }
1280 
1294 static __inline__ __m128 __DEFAULT_FN_ATTRS
1296 {
1297  return __builtin_ia32_cvtpd2ps((__v2df)__a);
1298 }
1299 
1314 static __inline__ __m128d __DEFAULT_FN_ATTRS
1316 {
1317  return (__m128d) __builtin_convertvector(
1318  __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
1319 }
1320 
1337 static __inline__ __m128d __DEFAULT_FN_ATTRS
1339 {
1340  return (__m128d) __builtin_convertvector(
1341  __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
1342 }
1343 
1357 static __inline__ __m128i __DEFAULT_FN_ATTRS
1359 {
1360  return __builtin_ia32_cvtpd2dq((__v2df)__a);
1361 }
1362 
1374 static __inline__ int __DEFAULT_FN_ATTRS
1376 {
1377  return __builtin_ia32_cvtsd2si((__v2df)__a);
1378 }
1379 
1399 static __inline__ __m128 __DEFAULT_FN_ATTRS
1400 _mm_cvtsd_ss(__m128 __a, __m128d __b)
1401 {
1402  return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
1403 }
1404 
1422 static __inline__ __m128d __DEFAULT_FN_ATTRS
1423 _mm_cvtsi32_sd(__m128d __a, int __b)
1424 {
1425  __a[0] = __b;
1426  return __a;
1427 }
1428 
1448 static __inline__ __m128d __DEFAULT_FN_ATTRS
1449 _mm_cvtss_sd(__m128d __a, __m128 __b)
1450 {
1451  __a[0] = __b[0];
1452  return __a;
1453 }
1454 
1472 static __inline__ __m128i __DEFAULT_FN_ATTRS
1474 {
1475  return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
1476 }
1477 
1490 static __inline__ int __DEFAULT_FN_ATTRS
1492 {
1493  return __builtin_ia32_cvttsd2si((__v2df)__a);
1494 }
1495 
1507 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1509 {
1510  return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
1511 }
1512 
1527 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1529 {
1530  return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
1531 }
1532 
1544 static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX
1546 {
1547  return __builtin_ia32_cvtpi2pd((__v2si)__a);
1548 }
1549 
1561 static __inline__ double __DEFAULT_FN_ATTRS
1563 {
1564  return __a[0];
1565 }
1566 
1578 static __inline__ __m128d __DEFAULT_FN_ATTRS
1579 _mm_load_pd(double const *__dp)
1580 {
1581  return *(const __m128d*)__dp;
1582 }
1583 
1596 static __inline__ __m128d __DEFAULT_FN_ATTRS
1597 _mm_load1_pd(double const *__dp)
1598 {
1599  struct __mm_load1_pd_struct {
1600  double __u;
1601  } __attribute__((__packed__, __may_alias__));
1602  double __u = ((const struct __mm_load1_pd_struct*)__dp)->__u;
1603  return __extension__ (__m128d){ __u, __u };
1604 }
1605 
1606 #define _mm_load_pd1(dp) _mm_load1_pd(dp)
1607 
1622 static __inline__ __m128d __DEFAULT_FN_ATTRS
1623 _mm_loadr_pd(double const *__dp)
1624 {
1625  __m128d __u = *(const __m128d*)__dp;
1626  return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
1627 }
1628 
1640 static __inline__ __m128d __DEFAULT_FN_ATTRS
1641 _mm_loadu_pd(double const *__dp)
1642 {
1643  struct __loadu_pd {
1644  __m128d_u __v;
1645  } __attribute__((__packed__, __may_alias__));
1646  return ((const struct __loadu_pd*)__dp)->__v;
1647 }
1648 
1660 static __inline__ __m128i __DEFAULT_FN_ATTRS
1661 _mm_loadu_si64(void const *__a)
1662 {
1663  struct __loadu_si64 {
1664  long long __v;
1665  } __attribute__((__packed__, __may_alias__));
1666  long long __u = ((const struct __loadu_si64*)__a)->__v;
1667  return __extension__ (__m128i)(__v2di){__u, 0LL};
1668 }
1669 
1681 static __inline__ __m128i __DEFAULT_FN_ATTRS
1682 _mm_loadu_si32(void const *__a)
1683 {
1684  struct __loadu_si32 {
1685  int __v;
1686  } __attribute__((__packed__, __may_alias__));
1687  int __u = ((const struct __loadu_si32*)__a)->__v;
1688  return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
1689 }
1690 
1702 static __inline__ __m128i __DEFAULT_FN_ATTRS
1703 _mm_loadu_si16(void const *__a)
1704 {
1705  struct __loadu_si16 {
1706  short __v;
1707  } __attribute__((__packed__, __may_alias__));
1708  short __u = ((const struct __loadu_si16*)__a)->__v;
1709  return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
1710 }
1711 
1723 static __inline__ __m128d __DEFAULT_FN_ATTRS
1724 _mm_load_sd(double const *__dp)
1725 {
1726  struct __mm_load_sd_struct {
1727  double __u;
1728  } __attribute__((__packed__, __may_alias__));
1729  double __u = ((const struct __mm_load_sd_struct*)__dp)->__u;
1730  return __extension__ (__m128d){ __u, 0 };
1731 }
1732 
1750 static __inline__ __m128d __DEFAULT_FN_ATTRS
1751 _mm_loadh_pd(__m128d __a, double const *__dp)
1752 {
1753  struct __mm_loadh_pd_struct {
1754  double __u;
1755  } __attribute__((__packed__, __may_alias__));
1756  double __u = ((const struct __mm_loadh_pd_struct*)__dp)->__u;
1757  return __extension__ (__m128d){ __a[0], __u };
1758 }
1759 
1777 static __inline__ __m128d __DEFAULT_FN_ATTRS
1778 _mm_loadl_pd(__m128d __a, double const *__dp)
1779 {
1780  struct __mm_loadl_pd_struct {
1781  double __u;
1782  } __attribute__((__packed__, __may_alias__));
1783  double __u = ((const struct __mm_loadl_pd_struct*)__dp)->__u;
1784  return __extension__ (__m128d){ __u, __a[1] };
1785 }
1786 
1798 static __inline__ __m128d __DEFAULT_FN_ATTRS
1800 {
1801  return (__m128d)__builtin_ia32_undef128();
1802 }
1803 
1818 static __inline__ __m128d __DEFAULT_FN_ATTRS
1819 _mm_set_sd(double __w)
1820 {
1821  return __extension__ (__m128d){ __w, 0 };
1822 }
1823 
1836 static __inline__ __m128d __DEFAULT_FN_ATTRS
1837 _mm_set1_pd(double __w)
1838 {
1839  return __extension__ (__m128d){ __w, __w };
1840 }
1841 
1854 static __inline__ __m128d __DEFAULT_FN_ATTRS
1855 _mm_set_pd1(double __w)
1856 {
1857  return _mm_set1_pd(__w);
1858 }
1859 
1874 static __inline__ __m128d __DEFAULT_FN_ATTRS
1875 _mm_set_pd(double __w, double __x)
1876 {
1877  return __extension__ (__m128d){ __x, __w };
1878 }
1879 
1895 static __inline__ __m128d __DEFAULT_FN_ATTRS
1896 _mm_setr_pd(double __w, double __x)
1897 {
1898  return __extension__ (__m128d){ __w, __x };
1899 }
1900 
1910 static __inline__ __m128d __DEFAULT_FN_ATTRS
1912 {
1913  return __extension__ (__m128d){ 0, 0 };
1914 }
1915 
1931 static __inline__ __m128d __DEFAULT_FN_ATTRS
1932 _mm_move_sd(__m128d __a, __m128d __b)
1933 {
1934  __a[0] = __b[0];
1935  return __a;
1936 }
1937 
1949 static __inline__ void __DEFAULT_FN_ATTRS
1950 _mm_store_sd(double *__dp, __m128d __a)
1951 {
1952  struct __mm_store_sd_struct {
1953  double __u;
1954  } __attribute__((__packed__, __may_alias__));
1955  ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
1956 }
1957 
1971 static __inline__ void __DEFAULT_FN_ATTRS
1972 _mm_store_pd(double *__dp, __m128d __a)
1973 {
1974  *(__m128d*)__dp = __a;
1975 }
1976 
1991 static __inline__ void __DEFAULT_FN_ATTRS
1992 _mm_store1_pd(double *__dp, __m128d __a)
1993 {
1994  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
1995  _mm_store_pd(__dp, __a);
1996 }
1997 
2012 static __inline__ void __DEFAULT_FN_ATTRS
2013 _mm_store_pd1(double *__dp, __m128d __a)
2014 {
2015  _mm_store1_pd(__dp, __a);
2016 }
2017 
2030 static __inline__ void __DEFAULT_FN_ATTRS
2031 _mm_storeu_pd(double *__dp, __m128d __a)
2032 {
2033  struct __storeu_pd {
2034  __m128d_u __v;
2035  } __attribute__((__packed__, __may_alias__));
2036  ((struct __storeu_pd*)__dp)->__v = __a;
2037 }
2038 
2053 static __inline__ void __DEFAULT_FN_ATTRS
2054 _mm_storer_pd(double *__dp, __m128d __a)
2055 {
2056  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
2057  *(__m128d *)__dp = __a;
2058 }
2059 
2071 static __inline__ void __DEFAULT_FN_ATTRS
2072 _mm_storeh_pd(double *__dp, __m128d __a)
2073 {
2074  struct __mm_storeh_pd_struct {
2075  double __u;
2076  } __attribute__((__packed__, __may_alias__));
2077  ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
2078 }
2079 
2091 static __inline__ void __DEFAULT_FN_ATTRS
2092 _mm_storel_pd(double *__dp, __m128d __a)
2093 {
2094  struct __mm_storeh_pd_struct {
2095  double __u;
2096  } __attribute__((__packed__, __may_alias__));
2097  ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
2098 }
2099 
2116 static __inline__ __m128i __DEFAULT_FN_ATTRS
2117 _mm_add_epi8(__m128i __a, __m128i __b)
2118 {
2119  return (__m128i)((__v16qu)__a + (__v16qu)__b);
2120 }
2121 
2138 static __inline__ __m128i __DEFAULT_FN_ATTRS
2139 _mm_add_epi16(__m128i __a, __m128i __b)
2140 {
2141  return (__m128i)((__v8hu)__a + (__v8hu)__b);
2142 }
2143 
2160 static __inline__ __m128i __DEFAULT_FN_ATTRS
2161 _mm_add_epi32(__m128i __a, __m128i __b)
2162 {
2163  return (__m128i)((__v4su)__a + (__v4su)__b);
2164 }
2165 
2178 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2179 _mm_add_si64(__m64 __a, __m64 __b)
2180 {
2181  return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
2182 }
2183 
2200 static __inline__ __m128i __DEFAULT_FN_ATTRS
2201 _mm_add_epi64(__m128i __a, __m128i __b)
2202 {
2203  return (__m128i)((__v2du)__a + (__v2du)__b);
2204 }
2205 
2221 static __inline__ __m128i __DEFAULT_FN_ATTRS
2222 _mm_adds_epi8(__m128i __a, __m128i __b)
2223 {
2224  return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
2225 }
2226 
2243 static __inline__ __m128i __DEFAULT_FN_ATTRS
2244 _mm_adds_epi16(__m128i __a, __m128i __b)
2245 {
2246  return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
2247 }
2248 
2264 static __inline__ __m128i __DEFAULT_FN_ATTRS
2265 _mm_adds_epu8(__m128i __a, __m128i __b)
2266 {
2267  return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
2268 }
2269 
2285 static __inline__ __m128i __DEFAULT_FN_ATTRS
2286 _mm_adds_epu16(__m128i __a, __m128i __b)
2287 {
2288  return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
2289 }
2290 
2305 static __inline__ __m128i __DEFAULT_FN_ATTRS
2306 _mm_avg_epu8(__m128i __a, __m128i __b)
2307 {
2308  return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
2309 }
2310 
2325 static __inline__ __m128i __DEFAULT_FN_ATTRS
2326 _mm_avg_epu16(__m128i __a, __m128i __b)
2327 {
2328  return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
2329 }
2330 
2351 static __inline__ __m128i __DEFAULT_FN_ATTRS
2352 _mm_madd_epi16(__m128i __a, __m128i __b)
2353 {
2354  return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
2355 }
2356 
2371 static __inline__ __m128i __DEFAULT_FN_ATTRS
2372 _mm_max_epi16(__m128i __a, __m128i __b)
2373 {
2374  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
2375 }
2376 
2391 static __inline__ __m128i __DEFAULT_FN_ATTRS
2392 _mm_max_epu8(__m128i __a, __m128i __b)
2393 {
2394  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
2395 }
2396 
2411 static __inline__ __m128i __DEFAULT_FN_ATTRS
2412 _mm_min_epi16(__m128i __a, __m128i __b)
2413 {
2414  return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
2415 }
2416 
2431 static __inline__ __m128i __DEFAULT_FN_ATTRS
2432 _mm_min_epu8(__m128i __a, __m128i __b)
2433 {
2434  return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
2435 }
2436 
2451 static __inline__ __m128i __DEFAULT_FN_ATTRS
2452 _mm_mulhi_epi16(__m128i __a, __m128i __b)
2453 {
2454  return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
2455 }
2456 
2471 static __inline__ __m128i __DEFAULT_FN_ATTRS
2472 _mm_mulhi_epu16(__m128i __a, __m128i __b)
2473 {
2474  return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
2475 }
2476 
2491 static __inline__ __m128i __DEFAULT_FN_ATTRS
2492 _mm_mullo_epi16(__m128i __a, __m128i __b)
2493 {
2494  return (__m128i)((__v8hu)__a * (__v8hu)__b);
2495 }
2496 
2510 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2511 _mm_mul_su32(__m64 __a, __m64 __b)
2512 {
2513  return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
2514 }
2515 
2529 static __inline__ __m128i __DEFAULT_FN_ATTRS
2530 _mm_mul_epu32(__m128i __a, __m128i __b)
2531 {
2532  return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
2533 }
2534 
2551 static __inline__ __m128i __DEFAULT_FN_ATTRS
2552 _mm_sad_epu8(__m128i __a, __m128i __b)
2553 {
2554  return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
2555 }
2556 
2569 static __inline__ __m128i __DEFAULT_FN_ATTRS
2570 _mm_sub_epi8(__m128i __a, __m128i __b)
2571 {
2572  return (__m128i)((__v16qu)__a - (__v16qu)__b);
2573 }
2574 
2587 static __inline__ __m128i __DEFAULT_FN_ATTRS
2588 _mm_sub_epi16(__m128i __a, __m128i __b)
2589 {
2590  return (__m128i)((__v8hu)__a - (__v8hu)__b);
2591 }
2592 
2605 static __inline__ __m128i __DEFAULT_FN_ATTRS
2606 _mm_sub_epi32(__m128i __a, __m128i __b)
2607 {
2608  return (__m128i)((__v4su)__a - (__v4su)__b);
2609 }
2610 
2624 static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2625 _mm_sub_si64(__m64 __a, __m64 __b)
2626 {
2627  return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
2628 }
2629 
2642 static __inline__ __m128i __DEFAULT_FN_ATTRS
2643 _mm_sub_epi64(__m128i __a, __m128i __b)
2644 {
2645  return (__m128i)((__v2du)__a - (__v2du)__b);
2646 }
2647 
2663 static __inline__ __m128i __DEFAULT_FN_ATTRS
2664 _mm_subs_epi8(__m128i __a, __m128i __b)
2665 {
2666  return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
2667 }
2668 
2684 static __inline__ __m128i __DEFAULT_FN_ATTRS
2685 _mm_subs_epi16(__m128i __a, __m128i __b)
2686 {
2687  return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
2688 }
2689 
2704 static __inline__ __m128i __DEFAULT_FN_ATTRS
2705 _mm_subs_epu8(__m128i __a, __m128i __b)
2706 {
2707  return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
2708 }
2709 
2724 static __inline__ __m128i __DEFAULT_FN_ATTRS
2725 _mm_subs_epu16(__m128i __a, __m128i __b)
2726 {
2727  return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
2728 }
2729 
2742 static __inline__ __m128i __DEFAULT_FN_ATTRS
2743 _mm_and_si128(__m128i __a, __m128i __b)
2744 {
2745  return (__m128i)((__v2du)__a & (__v2du)__b);
2746 }
2747 
2762 static __inline__ __m128i __DEFAULT_FN_ATTRS
2763 _mm_andnot_si128(__m128i __a, __m128i __b)
2764 {
2765  return (__m128i)(~(__v2du)__a & (__v2du)__b);
2766 }
2779 static __inline__ __m128i __DEFAULT_FN_ATTRS
2780 _mm_or_si128(__m128i __a, __m128i __b)
2781 {
2782  return (__m128i)((__v2du)__a | (__v2du)__b);
2783 }
2784 
2797 static __inline__ __m128i __DEFAULT_FN_ATTRS
2798 _mm_xor_si128(__m128i __a, __m128i __b)
2799 {
2800  return (__m128i)((__v2du)__a ^ (__v2du)__b);
2801 }
2802 
2820 #define _mm_slli_si128(a, imm) \
2821  (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
2822 
2823 #define _mm_bslli_si128(a, imm) \
2824  (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
2825 
2839 static __inline__ __m128i __DEFAULT_FN_ATTRS
2840 _mm_slli_epi16(__m128i __a, int __count)
2841 {
2842  return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
2843 }
2844 
2858 static __inline__ __m128i __DEFAULT_FN_ATTRS
2859 _mm_sll_epi16(__m128i __a, __m128i __count)
2860 {
2861  return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
2862 }
2863 
2877 static __inline__ __m128i __DEFAULT_FN_ATTRS
2878 _mm_slli_epi32(__m128i __a, int __count)
2879 {
2880  return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
2881 }
2882 
2896 static __inline__ __m128i __DEFAULT_FN_ATTRS
2897 _mm_sll_epi32(__m128i __a, __m128i __count)
2898 {
2899  return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
2900 }
2901 
2915 static __inline__ __m128i __DEFAULT_FN_ATTRS
2916 _mm_slli_epi64(__m128i __a, int __count)
2917 {
2918  return __builtin_ia32_psllqi128((__v2di)__a, __count);
2919 }
2920 
2934 static __inline__ __m128i __DEFAULT_FN_ATTRS
2935 _mm_sll_epi64(__m128i __a, __m128i __count)
2936 {
2937  return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
2938 }
2939 
2954 static __inline__ __m128i __DEFAULT_FN_ATTRS
2955 _mm_srai_epi16(__m128i __a, int __count)
2956 {
2957  return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
2958 }
2959 
2974 static __inline__ __m128i __DEFAULT_FN_ATTRS
2975 _mm_sra_epi16(__m128i __a, __m128i __count)
2976 {
2977  return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
2978 }
2979 
2994 static __inline__ __m128i __DEFAULT_FN_ATTRS
2995 _mm_srai_epi32(__m128i __a, int __count)
2996 {
2997  return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
2998 }
2999 
3014 static __inline__ __m128i __DEFAULT_FN_ATTRS
3015 _mm_sra_epi32(__m128i __a, __m128i __count)
3016 {
3017  return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
3018 }
3019 
3037 #define _mm_srli_si128(a, imm) \
3038  (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
3039 
3040 #define _mm_bsrli_si128(a, imm) \
3041  (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
3042 
3056 static __inline__ __m128i __DEFAULT_FN_ATTRS
3057 _mm_srli_epi16(__m128i __a, int __count)
3058 {
3059  return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
3060 }
3061 
3075 static __inline__ __m128i __DEFAULT_FN_ATTRS
3076 _mm_srl_epi16(__m128i __a, __m128i __count)
3077 {
3078  return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
3079 }
3080 
3094 static __inline__ __m128i __DEFAULT_FN_ATTRS
3095 _mm_srli_epi32(__m128i __a, int __count)
3096 {
3097  return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
3098 }
3099 
3113 static __inline__ __m128i __DEFAULT_FN_ATTRS
3114 _mm_srl_epi32(__m128i __a, __m128i __count)
3115 {
3116  return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
3117 }
3118 
3132 static __inline__ __m128i __DEFAULT_FN_ATTRS
3133 _mm_srli_epi64(__m128i __a, int __count)
3134 {
3135  return __builtin_ia32_psrlqi128((__v2di)__a, __count);
3136 }
3137 
3151 static __inline__ __m128i __DEFAULT_FN_ATTRS
3152 _mm_srl_epi64(__m128i __a, __m128i __count)
3153 {
3154  return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
3155 }
3156 
3170 static __inline__ __m128i __DEFAULT_FN_ATTRS
3171 _mm_cmpeq_epi8(__m128i __a, __m128i __b)
3172 {
3173  return (__m128i)((__v16qi)__a == (__v16qi)__b);
3174 }
3175 
3189 static __inline__ __m128i __DEFAULT_FN_ATTRS
3190 _mm_cmpeq_epi16(__m128i __a, __m128i __b)
3191 {
3192  return (__m128i)((__v8hi)__a == (__v8hi)__b);
3193 }
3194 
3208 static __inline__ __m128i __DEFAULT_FN_ATTRS
3209 _mm_cmpeq_epi32(__m128i __a, __m128i __b)
3210 {
3211  return (__m128i)((__v4si)__a == (__v4si)__b);
3212 }
3213 
3228 static __inline__ __m128i __DEFAULT_FN_ATTRS
3229 _mm_cmpgt_epi8(__m128i __a, __m128i __b)
3230 {
3231  /* This function always performs a signed comparison, but __v16qi is a char
3232  which may be signed or unsigned, so use __v16qs. */
3233  return (__m128i)((__v16qs)__a > (__v16qs)__b);
3234 }
3235 
3251 static __inline__ __m128i __DEFAULT_FN_ATTRS
3252 _mm_cmpgt_epi16(__m128i __a, __m128i __b)
3253 {
3254  return (__m128i)((__v8hi)__a > (__v8hi)__b);
3255 }
3256 
3272 static __inline__ __m128i __DEFAULT_FN_ATTRS
3273 _mm_cmpgt_epi32(__m128i __a, __m128i __b)
3274 {
3275  return (__m128i)((__v4si)__a > (__v4si)__b);
3276 }
3277 
3293 static __inline__ __m128i __DEFAULT_FN_ATTRS
3294 _mm_cmplt_epi8(__m128i __a, __m128i __b)
3295 {
3296  return _mm_cmpgt_epi8(__b, __a);
3297 }
3298 
3314 static __inline__ __m128i __DEFAULT_FN_ATTRS
3315 _mm_cmplt_epi16(__m128i __a, __m128i __b)
3316 {
3317  return _mm_cmpgt_epi16(__b, __a);
3318 }
3319 
3335 static __inline__ __m128i __DEFAULT_FN_ATTRS
3336 _mm_cmplt_epi32(__m128i __a, __m128i __b)
3337 {
3338  return _mm_cmpgt_epi32(__b, __a);
3339 }
3340 
3341 #ifdef __x86_64__
3359 static __inline__ __m128d __DEFAULT_FN_ATTRS
3360 _mm_cvtsi64_sd(__m128d __a, long long __b)
3361 {
3362  __a[0] = __b;
3363  return __a;
3364 }
3365 
3377 static __inline__ long long __DEFAULT_FN_ATTRS
3378 _mm_cvtsd_si64(__m128d __a)
3379 {
3380  return __builtin_ia32_cvtsd2si64((__v2df)__a);
3381 }
3382 
3395 static __inline__ long long __DEFAULT_FN_ATTRS
3396 _mm_cvttsd_si64(__m128d __a)
3397 {
3398  return __builtin_ia32_cvttsd2si64((__v2df)__a);
3399 }
3400 #endif
3401 
3411 static __inline__ __m128 __DEFAULT_FN_ATTRS
3413 {
3414  return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);
3415 }
3416 
3427 static __inline__ __m128i __DEFAULT_FN_ATTRS
3429 {
3430  return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
3431 }
3432 
3444 static __inline__ __m128i __DEFAULT_FN_ATTRS
3446 {
3447  return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
3448 }
3449 
3460 static __inline__ __m128i __DEFAULT_FN_ATTRS
3462 {
3463  return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };
3464 }
3465 
3466 #ifdef __x86_64__
3477 static __inline__ __m128i __DEFAULT_FN_ATTRS
3478 _mm_cvtsi64_si128(long long __a)
3479 {
3480  return __extension__ (__m128i)(__v2di){ __a, 0 };
3481 }
3482 #endif
3483 
3495 static __inline__ int __DEFAULT_FN_ATTRS
3497 {
3498  __v4si __b = (__v4si)__a;
3499  return __b[0];
3500 }
3501 
3502 #ifdef __x86_64__
3514 static __inline__ long long __DEFAULT_FN_ATTRS
3515 _mm_cvtsi128_si64(__m128i __a)
3516 {
3517  return __a[0];
3518 }
3519 #endif
3520 
3531 static __inline__ __m128i __DEFAULT_FN_ATTRS
3532 _mm_load_si128(__m128i const *__p)
3533 {
3534  return *__p;
3535 }
3536 
3547 static __inline__ __m128i __DEFAULT_FN_ATTRS
3548 _mm_loadu_si128(__m128i_u const *__p)
3549 {
3550  struct __loadu_si128 {
3551  __m128i_u __v;
3552  } __attribute__((__packed__, __may_alias__));
3553  return ((const struct __loadu_si128*)__p)->__v;
3554 }
3555 
3568 static __inline__ __m128i __DEFAULT_FN_ATTRS
3569 _mm_loadl_epi64(__m128i_u const *__p)
3570 {
3571  struct __mm_loadl_epi64_struct {
3572  long long __u;
3573  } __attribute__((__packed__, __may_alias__));
3574  return __extension__ (__m128i) { ((const struct __mm_loadl_epi64_struct*)__p)->__u, 0};
3575 }
3576 
3586 static __inline__ __m128i __DEFAULT_FN_ATTRS
3588 {
3589  return (__m128i)__builtin_ia32_undef128();
3590 }
3591 
3608 static __inline__ __m128i __DEFAULT_FN_ATTRS
3609 _mm_set_epi64x(long long __q1, long long __q0)
3610 {
3611  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
3612 }
3613 
3630 static __inline__ __m128i __DEFAULT_FN_ATTRS
3631 _mm_set_epi64(__m64 __q1, __m64 __q0)
3632 {
3633  return _mm_set_epi64x((long long)__q1, (long long)__q0);
3634 }
3635 
3658 static __inline__ __m128i __DEFAULT_FN_ATTRS
3659 _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
3660 {
3661  return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
3662 }
3663 
3698 static __inline__ __m128i __DEFAULT_FN_ATTRS
3699 _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
3700 {
3701  return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
3702 }
3703 
3746 static __inline__ __m128i __DEFAULT_FN_ATTRS
3747 _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
3748 {
3749  return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
3750 }
3751 
3765 static __inline__ __m128i __DEFAULT_FN_ATTRS
3766 _mm_set1_epi64x(long long __q)
3767 {
3768  return _mm_set_epi64x(__q, __q);
3769 }
3770 
3784 static __inline__ __m128i __DEFAULT_FN_ATTRS
3785 _mm_set1_epi64(__m64 __q)
3786 {
3787  return _mm_set_epi64(__q, __q);
3788 }
3789 
3803 static __inline__ __m128i __DEFAULT_FN_ATTRS
3805 {
3806  return _mm_set_epi32(__i, __i, __i, __i);
3807 }
3808 
3822 static __inline__ __m128i __DEFAULT_FN_ATTRS
3823 _mm_set1_epi16(short __w)
3824 {
3825  return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
3826 }
3827 
3841 static __inline__ __m128i __DEFAULT_FN_ATTRS
3843 {
3844  return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);
3845 }
3846 
3861 static __inline__ __m128i __DEFAULT_FN_ATTRS
3862 _mm_setr_epi64(__m64 __q0, __m64 __q1)
3863 {
3864  return _mm_set_epi64(__q1, __q0);
3865 }
3866 
3884 static __inline__ __m128i __DEFAULT_FN_ATTRS
3885 _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
3886 {
3887  return _mm_set_epi32(__i3, __i2, __i1, __i0);
3888 }
3889 
3915 static __inline__ __m128i __DEFAULT_FN_ATTRS
3916 _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
3917 {
3918  return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
3919 }
3920 
3962 static __inline__ __m128i __DEFAULT_FN_ATTRS
3963 _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
3964 {
3965  return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
3966 }
3967 
3976 static __inline__ __m128i __DEFAULT_FN_ATTRS
3978 {
3979  return __extension__ (__m128i)(__v2di){ 0LL, 0LL };
3980 }
3981 
3994 static __inline__ void __DEFAULT_FN_ATTRS
3995 _mm_store_si128(__m128i *__p, __m128i __b)
3996 {
3997  *__p = __b;
3998 }
3999 
4010 static __inline__ void __DEFAULT_FN_ATTRS
4011 _mm_storeu_si128(__m128i_u *__p, __m128i __b)
4012 {
4013  struct __storeu_si128 {
4014  __m128i_u __v;
4015  } __attribute__((__packed__, __may_alias__));
4016  ((struct __storeu_si128*)__p)->__v = __b;
4017 }
4018 
4031 static __inline__ void __DEFAULT_FN_ATTRS
4032 _mm_storeu_si64(void *__p, __m128i __b)
4033 {
4034  struct __storeu_si64 {
4035  long long __v;
4036  } __attribute__((__packed__, __may_alias__));
4037  ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
4038 }
4039 
4052 static __inline__ void __DEFAULT_FN_ATTRS
4053 _mm_storeu_si32(void *__p, __m128i __b)
4054 {
4055  struct __storeu_si32 {
4056  int __v;
4057  } __attribute__((__packed__, __may_alias__));
4058  ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
4059 }
4060 
4073 static __inline__ void __DEFAULT_FN_ATTRS
4074 _mm_storeu_si16(void *__p, __m128i __b)
4075 {
4076  struct __storeu_si16 {
4077  short __v;
4078  } __attribute__((__packed__, __may_alias__));
4079  ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
4080 }
4081 
4103 static __inline__ void __DEFAULT_FN_ATTRS
4104 _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
4105 {
4106  __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
4107 }
4108 
4122 static __inline__ void __DEFAULT_FN_ATTRS
4123 _mm_storel_epi64(__m128i_u *__p, __m128i __a)
4124 {
4125  struct __mm_storel_epi64_struct {
4126  long long __u;
4127  } __attribute__((__packed__, __may_alias__));
4128  ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
4129 }
4130 
4145 static __inline__ void __DEFAULT_FN_ATTRS
4146 _mm_stream_pd(double *__p, __m128d __a)
4147 {
4148  __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
4149 }
4150 
4164 static __inline__ void __DEFAULT_FN_ATTRS
4165 _mm_stream_si128(__m128i *__p, __m128i __a)
4166 {
4167  __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
4168 }
4169 
4183 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4184 _mm_stream_si32(int *__p, int __a)
4185 {
4186  __builtin_ia32_movnti(__p, __a);
4187 }
4188 
4189 #ifdef __x86_64__
4203 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4204 _mm_stream_si64(long long *__p, long long __a)
4205 {
4206  __builtin_ia32_movnti64(__p, __a);
4207 }
4208 #endif
4209 
4210 #if defined(__cplusplus)
4211 extern "C" {
4212 #endif
4213 
4224 void _mm_clflush(void const * __p);
4225 
4235 void _mm_lfence(void);
4236 
4246 void _mm_mfence(void);
4247 
4248 #if defined(__cplusplus)
4249 } // extern "C"
4250 #endif
4251 
4274 static __inline__ __m128i __DEFAULT_FN_ATTRS
4275 _mm_packs_epi16(__m128i __a, __m128i __b)
4276 {
4277  return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
4278 }
4279 
4302 static __inline__ __m128i __DEFAULT_FN_ATTRS
4303 _mm_packs_epi32(__m128i __a, __m128i __b)
4304 {
4305  return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
4306 }
4307 
4330 static __inline__ __m128i __DEFAULT_FN_ATTRS
4331 _mm_packus_epi16(__m128i __a, __m128i __b)
4332 {
4333  return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
4334 }
4335 
4358 #define _mm_extract_epi16(a, imm) \
4359  (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
4360  (int)(imm))
4361 
4382 #define _mm_insert_epi16(a, b, imm) \
4383  (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
4384  (int)(imm))
4385 
4398 static __inline__ int __DEFAULT_FN_ATTRS
4400 {
4401  return __builtin_ia32_pmovmskb128((__v16qi)__a);
4402 }
4403 
4432 #define _mm_shuffle_epi32(a, imm) \
4433  (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))
4434 
4462 #define _mm_shufflelo_epi16(a, imm) \
4463  (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))
4464 
4492 #define _mm_shufflehi_epi16(a, imm) \
4493  (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))
4494 
4524 static __inline__ __m128i __DEFAULT_FN_ATTRS
4525 _mm_unpackhi_epi8(__m128i __a, __m128i __b)
4526 {
4527  return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
4528 }
4529 
4551 static __inline__ __m128i __DEFAULT_FN_ATTRS
4552 _mm_unpackhi_epi16(__m128i __a, __m128i __b)
4553 {
4554  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
4555 }
4556 
4574 static __inline__ __m128i __DEFAULT_FN_ATTRS
4575 _mm_unpackhi_epi32(__m128i __a, __m128i __b)
4576 {
4577  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
4578 }
4579 
4595 static __inline__ __m128i __DEFAULT_FN_ATTRS
4596 _mm_unpackhi_epi64(__m128i __a, __m128i __b)
4597 {
4598  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
4599 }
4600 
4630 static __inline__ __m128i __DEFAULT_FN_ATTRS
4631 _mm_unpacklo_epi8(__m128i __a, __m128i __b)
4632 {
4633  return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
4634 }
4635 
4658 static __inline__ __m128i __DEFAULT_FN_ATTRS
4659 _mm_unpacklo_epi16(__m128i __a, __m128i __b)
4660 {
4661  return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
4662 }
4663 
4681 static __inline__ __m128i __DEFAULT_FN_ATTRS
4682 _mm_unpacklo_epi32(__m128i __a, __m128i __b)
4683 {
4684  return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
4685 }
4686 
4702 static __inline__ __m128i __DEFAULT_FN_ATTRS
4703 _mm_unpacklo_epi64(__m128i __a, __m128i __b)
4704 {
4705  return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
4706 }
4707 
4719 static __inline__ __m64 __DEFAULT_FN_ATTRS
4721 {
4722  return (__m64)__a[0];
4723 }
4724 
4736 static __inline__ __m128i __DEFAULT_FN_ATTRS
4738 {
4739  return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };
4740 }
4741 
4754 static __inline__ __m128i __DEFAULT_FN_ATTRS
4756 {
4757  return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);
4758 }
4759 
4775 static __inline__ __m128d __DEFAULT_FN_ATTRS
4776 _mm_unpackhi_pd(__m128d __a, __m128d __b)
4777 {
4778  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
4779 }
4780 
4796 static __inline__ __m128d __DEFAULT_FN_ATTRS
4797 _mm_unpacklo_pd(__m128d __a, __m128d __b)
4798 {
4799  return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
4800 }
4801 
4815 static __inline__ int __DEFAULT_FN_ATTRS
4817 {
4818  return __builtin_ia32_movmskpd((__v2df)__a);
4819 }
4820 
4821 
4846 #define _mm_shuffle_pd(a, b, i) \
4847  (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4848  (int)(i))
4849 
4861 static __inline__ __m128 __DEFAULT_FN_ATTRS
4863 {
4864  return (__m128)__a;
4865 }
4866 
4878 static __inline__ __m128i __DEFAULT_FN_ATTRS
4880 {
4881  return (__m128i)__a;
4882 }
4883 
4895 static __inline__ __m128d __DEFAULT_FN_ATTRS
4897 {
4898  return (__m128d)__a;
4899 }
4900 
4912 static __inline__ __m128i __DEFAULT_FN_ATTRS
4914 {
4915  return (__m128i)__a;
4916 }
4917 
4929 static __inline__ __m128 __DEFAULT_FN_ATTRS
4931 {
4932  return (__m128)__a;
4933 }
4934 
4946 static __inline__ __m128d __DEFAULT_FN_ATTRS
4948 {
4949  return (__m128d)__a;
4950 }
4951 
4952 #if defined(__cplusplus)
4953 extern "C" {
4954 #endif
4955 
4963 void _mm_pause(void);
4964 
4965 #if defined(__cplusplus)
4966 } // extern "C"
4967 #endif
4968 #undef __DEFAULT_FN_ATTRS
4969 #undef __DEFAULT_FN_ATTRS_MMX
4970 
4971 #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
4972 
4973 #define _MM_DENORMALS_ZERO_ON (0x0040U)
4974 #define _MM_DENORMALS_ZERO_OFF (0x0000U)
4975 
4976 #define _MM_DENORMALS_ZERO_MASK (0x0040U)
4977 
4978 #define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
4979 #define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
4980 
4981 #endif /* __EMMINTRIN_H */
static __inline unsigned char unsigned int __x
Definition: adxintrin.h:22
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition: adxintrin.h:24
static __inline__ vector float vector float vector float __c
Definition: altivec.h:4243
static __inline__ vector float vector float __b
Definition: altivec.h:520
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition: emmintrin.h:1508
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 64-bit integral ...
Definition: emmintrin.h:3862
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1044
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
Definition: emmintrin.h:4659
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a)
Moves the 64-bit operand to a 128-bit integer vector, zeroing the upper bits.
Definition: emmintrin.h:4737
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
Definition: emmintrin.h:2013
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Initializes the 16-bit values in a 128-bit vector of [8 x i16] with the specified 16-bit integer valu...
Definition: emmintrin.h:3699
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1018
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
Definition: emmintrin.h:1855
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit unsigned integer...
Definition: emmintrin.h:4331
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
Definition: emmintrin.h:2432
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:577
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition: emmintrin.h:56
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:201
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
Definition: emmintrin.h:117
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit integer vector.
Definition: emmintrin.h:4879
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:389
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a)
Loads a 32-bit integer value to the low element of a 128-bit integer vector and clears the upper elem...
Definition: emmintrin.h:1682
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
Definition: emmintrin.h:4399
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2897
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition: emmintrin.h:2743
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:813
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1198
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an unaligned memory location.
Definition: emmintrin.h:1641
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition: emmintrin.h:2643
static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Moves bytes selected by the mask from the first operand to the specified unaligned memory location.
Definition: emmintrin.h:4104
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1172
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Initializes the 32-bit values in a 128-bit vector of [4 x i32] with the specified 32-bit integer valu...
Definition: emmintrin.h:3659
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1224
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors,...
Definition: emmintrin.h:2222
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an aligned memory location.
Definition: emmintrin.h:1579
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
Definition: emmintrin.h:1338
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
Definition: emmintrin.h:3171
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3095
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
Definition: emmintrin.h:1837
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:3315
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double] initialized with the specified double-prec...
Definition: emmintrin.h:1875
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
Definition: emmintrin.h:2588
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:734
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition: emmintrin.h:181
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, __m128i __a)
Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to a memory location.
Definition: emmintrin.h:4123
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:3336
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
Definition: emmintrin.h:2780
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:509
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, __m128i __b)
Stores a 16-bit integer value from the low element of a 128-bit integer vector.
Definition: emmintrin.h:4074
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
Definition: emmintrin.h:288
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp)
Loads a 64-bit double-precision value to the low element of a 128-bit integer vector and clears the u...
Definition: emmintrin.h:1724
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a)
Loads a 16-bit integer value to the low element of a 128-bit integer vector and clears the upper elem...
Definition: emmintrin.h:1703
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
Definition: emmintrin.h:4776
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:759
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
Definition: emmintrin.h:2763
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
Definition: emmintrin.h:3229
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3114
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding 16-bit values of the 128-bit integer vectors for equality.
Definition: emmintrin.h:3190
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
Definition: emmintrin.h:2492
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1146
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2955
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:407
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
Definition: emmintrin.h:2392
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
Definition: emmintrin.h:2326
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
Definition: emmintrin.h:1992
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [2 x double].
Definition: emmintrin.h:4947
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3076
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
Definition: emmintrin.h:4631
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:785
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:3252
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:967
#define __DEFAULT_FN_ATTRS
Definition: emmintrin.h:37
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
Definition: emmintrin.h:4797
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
Definition: emmintrin.h:2664
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:709
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:661
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
Definition: emmintrin.h:4703
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2995
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a)
Extracts the sign bits of the double-precision values in the 128-bit vector of [2 x double],...
Definition: emmintrin.h:4816
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a)
Stores a 128-bit integer vector to a 128-bit aligned memory location.
Definition: emmintrin.h:4165
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding 32-bit values of the 128-bit integer vectors for equality.
Definition: emmintrin.h:3209
static __inline__ void int __a
Definition: emmintrin.h:4185
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition: emmintrin.h:139
void _mm_mfence(void)
Forces strong memory ordering (serialization) between load and store instructions preceding this inst...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
Definition: emmintrin.h:2725
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, __m128i __b)
Stores a 32-bit integer value from the low element of a 128-bit integer vector.
Definition: emmintrin.h:4053
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a)
Moves the lower 64 bits of a 128-bit integer vector to a 128-bit integer vector, zeroing the upper bi...
Definition: emmintrin.h:4755
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:467
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
Definition: emmintrin.h:4596
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a)
Converts the low-order element of a [2 x double] vector into a 32-bit signed integer value,...
Definition: emmintrin.h:1491
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition: emmintrin.h:3412
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
Definition: emmintrin.h:4682
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
Definition: emmintrin.h:3273
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b)
Converts the lower single-precision floating-point element of a 128-bit vector of [4 x float],...
Definition: emmintrin.h:1449
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
Definition: emmintrin.h:244
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition: emmintrin.h:1932
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
Definition: emmintrin.h:4552
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition: emmintrin.h:1528
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition: emmintrin.h:1358
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition: emmintrin.h:3587
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit integer vector.
Definition: emmintrin.h:4913
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1250
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
Definition: emmintrin.h:2352
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
Definition: emmintrin.h:2286
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a)
Returns a vector of [4 x i32] where the lowest element is the input operand and the remaining element...
Definition: emmintrin.h:3461
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a)
Loads a 64-bit integer value to the low element of a 128-bit integer vector and clears the upper elem...
Definition: emmintrin.h:1661
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3133
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2840
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:598
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b)
Converts the lower double-precision floating-point element of a 128-bit vector of [2 x double],...
Definition: emmintrin.h:1400
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a)
Converts the low-order element of a 128-bit vector of [2 x double] into a 32-bit signed integer value...
Definition: emmintrin.h:1375
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition: emmintrin.h:98
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the low-order bits of a 128-bit vector of [2 x double].
Definition: emmintrin.h:1778
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
Definition: emmintrin.h:2265
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors,...
Definition: emmintrin.h:2244
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
Definition: emmintrin.h:2685
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
Definition: emmintrin.h:2570
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition: emmintrin.h:1799
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
Definition: emmintrin.h:4575
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition: emmintrin.h:1295
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 8-bit integral v...
Definition: emmintrin.h:3963
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
Definition: emmintrin.h:2452
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1096
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:556
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
Definition: emmintrin.h:2201
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
Definition: emmintrin.h:2472
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
Definition: emmintrin.h:371
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:992
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
Definition: emmintrin.h:2372
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit floating-point vector of [4 x fl...
Definition: emmintrin.h:4862
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2935
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit signed integers,...
Definition: emmintrin.h:4275
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition: emmintrin.h:3804
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2975
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp)
Loads two double-precision values, in reverse order, from an aligned memory location into a 128-bit v...
Definition: emmintrin.h:1623
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1070
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:640
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit floating-point vector of [2 x dou...
Definition: emmintrin.h:4896
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a)
Stores the upper 64 bits of a 128-bit vector of [2 x double] to a memory location.
Definition: emmintrin.h:2072
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2878
void _mm_lfence(void)
Forces strong memory ordering (serialization) between load instructions preceding this instruction an...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b)
Computes the absolute differences of corresponding 8-bit integer values in two 128-bit vectors.
Definition: emmintrin.h:2552
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:941
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:841
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
Definition: emmintrin.h:2092
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a)
Moves packed double-precision values from a 128-bit vector of [2 x double] to a memory location.
Definition: emmintrin.h:1972
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
Definition: emmintrin.h:2161
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
Definition: emmintrin.h:3548
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
Definition: emmintrin.h:3842
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit signed integers...
Definition: emmintrin.h:4303
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2859
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
Definition: emmintrin.h:2117
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:916
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:488
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
Definition: emmintrin.h:3532
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:684
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
Definition: emmintrin.h:3294
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [4 x float].
Definition: emmintrin.h:4930
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition: emmintrin.h:1911
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
Definition: emmintrin.h:350
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
Definition: emmintrin.h:3609
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b)
Converts a 32-bit signed integer value, in the second parameter, into a double-precision floating-poi...
Definition: emmintrin.h:1423
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition: emmintrin.h:2530
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:891
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3152
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:866
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32], truncating the result when it is inexact...
Definition: emmintrin.h:3445
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 16-bit integral ...
Definition: emmintrin.h:3916
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the high-order bits of a 128-bit vector of [2 x double].
Definition: emmintrin.h:1751
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Initializes the 8-bit values in a 128-bit vector of [16 x i8] with the specified 8-bit integer values...
Definition: emmintrin.h:3747
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q)
Initializes both values in a 128-bit vector of [2 x i64] with the specified 64-bit value.
Definition: emmintrin.h:3785
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b)
Calculates the square root of the lower double-precision value of the second operand and returns it i...
Definition: emmintrin.h:226
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double], initialized in reverse order with the spe...
Definition: emmintrin.h:1896
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
Definition: emmintrin.h:3631
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b)
Subtracts signed or unsigned 64-bit integer values and writes the difference to the corresponding bit...
Definition: emmintrin.h:2625
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the lesser of the pair of...
Definition: emmintrin.h:268
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:2916
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
Definition: emmintrin.h:75
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a)
Stores a 128-bit floating point vector of [2 x double] to a 128-bit aligned memory location.
Definition: emmintrin.h:4146
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, __m128i __b)
Stores a 64-bit integer value from the low element of a 128-bit integer vector.
Definition: emmintrin.h:4032
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
Definition: emmintrin.h:1950
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp)
Loads a double-precision floating-point value from a specified memory location and duplicates it to b...
Definition: emmintrin.h:1597
#define __DEFAULT_FN_ATTRS_MMX
Definition: emmintrin.h:38
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
Definition: emmintrin.h:332
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
Definition: emmintrin.h:2412
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a)
Returns the lower 64 bits of a 128-bit integer vector as a 64-bit integer.
Definition: emmintrin.h:4720
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b)
Stores a 128-bit integer vector to a memory location aligned on a 128-bit boundary.
Definition: emmintrin.h:3995
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
Definition: emmintrin.h:158
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] for...
Definition: emmintrin.h:426
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w)
Constructs a 128-bit floating-point vector of [2 x double].
Definition: emmintrin.h:1819
static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a)
Converts the two signed 32-bit integer elements of a 64-bit vector of [2 x i32] into two double-preci...
Definition: emmintrin.h:1545
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the greater of the pair o...
Definition: emmintrin.h:312
static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a)
Returns the low-order element of a 128-bit vector of [2 x double] as a double-precision floating-poin...
Definition: emmintrin.h:1562
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i_u const *__p)
Returns a vector of [2 x i64] where the lower element is taken from the lower element of the operand,...
Definition: emmintrin.h:3569
void _mm_pause(void)
Indicates that a spin loop is being executed for the purposes of optimizing power consumption during ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value.
Definition: emmintrin.h:3823
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
Definition: emmintrin.h:3766
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a)
Moves the least significant 32 bits of a vector of [4 x i32] to a 32-bit signed integer value.
Definition: emmintrin.h:3496
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition: emmintrin.h:2606
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
Definition: emmintrin.h:2306
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, __m64 __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the two 64-bit integer vecto...
Definition: emmintrin.h:2511
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a)
Stores two double-precision values, in reverse order, from a 128-bit vector of [2 x double] to a 16-b...
Definition: emmintrin.h:2054
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
Definition: emmintrin.h:4525
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition: emmintrin.h:3057
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a)
Stores a 128-bit vector of [2 x double] into an unaligned memory location.
Definition: emmintrin.h:2031
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1276
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
Definition: emmintrin.h:2139
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
Definition: emmintrin.h:2705
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b)
Adds two signed or unsigned 64-bit integer values, returning the lower 64 bits of the sum.
Definition: emmintrin.h:2179
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
Definition: emmintrin.h:1315
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition: emmintrin.h:3977
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition: emmintrin.h:1122
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:532
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:446
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, __m128i __b)
Stores a 128-bit integer vector to an unaligned memory location.
Definition: emmintrin.h:4011
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
Definition: emmintrin.h:15
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition: emmintrin.h:3015
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 32-bit integral ...
Definition: emmintrin.h:3885
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition: emmintrin.h:1473
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition: emmintrin.h:3428
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
Definition: emmintrin.h:2798
void _mm_clflush(void const *__p)
The cache line containing __p is flushed and invalidated from all caches in the coherency domain.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition: emmintrin.h:619
struct __storeu_i16 *__P __v
Definition: immintrin.h:348