ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
emmintrin.h
Go to the documentation of this file.
1/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __EMMINTRIN_H
11#define __EMMINTRIN_H
12
13#include <xmmintrin.h>
14
15typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16)));
16typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16)));
17
18typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1)));
19typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1)));
20
21/* Type defines. */
22typedef double __v2df __attribute__ ((__vector_size__ (16)));
23typedef long long __v2di __attribute__ ((__vector_size__ (16)));
24typedef short __v8hi __attribute__((__vector_size__(16)));
25typedef char __v16qi __attribute__((__vector_size__(16)));
26
27/* Unsigned types */
28typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
29typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
30typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
31
32/* We need an explicitly signed variant for char. Note that this shouldn't
33 * appear in the interface though. */
34typedef signed char __v16qs __attribute__((__vector_size__(16)));
35
36/* Define the default attributes for the functions in this file. */
37#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128)))
38#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64)))
39
55static __inline__ __m128d __DEFAULT_FN_ATTRS
56_mm_add_sd(__m128d __a, __m128d __b)
57{
58 __a[0] += __b[0];
59 return __a;
60}
61
74static __inline__ __m128d __DEFAULT_FN_ATTRS
75_mm_add_pd(__m128d __a, __m128d __b)
76{
77 return (__m128d)((__v2df)__a + (__v2df)__b);
78}
79
97static __inline__ __m128d __DEFAULT_FN_ATTRS
98_mm_sub_sd(__m128d __a, __m128d __b)
99{
100 __a[0] -= __b[0];
101 return __a;
102}
103
116static __inline__ __m128d __DEFAULT_FN_ATTRS
117_mm_sub_pd(__m128d __a, __m128d __b)
118{
119 return (__m128d)((__v2df)__a - (__v2df)__b);
120}
121
138static __inline__ __m128d __DEFAULT_FN_ATTRS
139_mm_mul_sd(__m128d __a, __m128d __b)
140{
141 __a[0] *= __b[0];
142 return __a;
143}
144
157static __inline__ __m128d __DEFAULT_FN_ATTRS
158_mm_mul_pd(__m128d __a, __m128d __b)
159{
160 return (__m128d)((__v2df)__a * (__v2df)__b);
161}
162
180static __inline__ __m128d __DEFAULT_FN_ATTRS
181_mm_div_sd(__m128d __a, __m128d __b)
182{
183 __a[0] /= __b[0];
184 return __a;
185}
186
200static __inline__ __m128d __DEFAULT_FN_ATTRS
201_mm_div_pd(__m128d __a, __m128d __b)
202{
203 return (__m128d)((__v2df)__a / (__v2df)__b);
204}
205
225static __inline__ __m128d __DEFAULT_FN_ATTRS
226_mm_sqrt_sd(__m128d __a, __m128d __b)
227{
228 __m128d __c = __builtin_ia32_sqrtsd((__v2df)__b);
229 return __extension__ (__m128d) { __c[0], __a[1] };
230}
231
243static __inline__ __m128d __DEFAULT_FN_ATTRS
245{
246 return __builtin_ia32_sqrtpd((__v2df)__a);
247}
248
267static __inline__ __m128d __DEFAULT_FN_ATTRS
268_mm_min_sd(__m128d __a, __m128d __b)
269{
270 return __builtin_ia32_minsd((__v2df)__a, (__v2df)__b);
271}
272
287static __inline__ __m128d __DEFAULT_FN_ATTRS
288_mm_min_pd(__m128d __a, __m128d __b)
289{
290 return __builtin_ia32_minpd((__v2df)__a, (__v2df)__b);
291}
292
311static __inline__ __m128d __DEFAULT_FN_ATTRS
312_mm_max_sd(__m128d __a, __m128d __b)
313{
314 return __builtin_ia32_maxsd((__v2df)__a, (__v2df)__b);
315}
316
331static __inline__ __m128d __DEFAULT_FN_ATTRS
332_mm_max_pd(__m128d __a, __m128d __b)
333{
334 return __builtin_ia32_maxpd((__v2df)__a, (__v2df)__b);
335}
336
349static __inline__ __m128d __DEFAULT_FN_ATTRS
350_mm_and_pd(__m128d __a, __m128d __b)
351{
352 return (__m128d)((__v2du)__a & (__v2du)__b);
353}
354
370static __inline__ __m128d __DEFAULT_FN_ATTRS
371_mm_andnot_pd(__m128d __a, __m128d __b)
372{
373 return (__m128d)(~(__v2du)__a & (__v2du)__b);
374}
375
388static __inline__ __m128d __DEFAULT_FN_ATTRS
389_mm_or_pd(__m128d __a, __m128d __b)
390{
391 return (__m128d)((__v2du)__a | (__v2du)__b);
392}
393
406static __inline__ __m128d __DEFAULT_FN_ATTRS
407_mm_xor_pd(__m128d __a, __m128d __b)
408{
409 return (__m128d)((__v2du)__a ^ (__v2du)__b);
410}
411
425static __inline__ __m128d __DEFAULT_FN_ATTRS
426_mm_cmpeq_pd(__m128d __a, __m128d __b)
427{
428 return (__m128d)__builtin_ia32_cmpeqpd((__v2df)__a, (__v2df)__b);
429}
430
445static __inline__ __m128d __DEFAULT_FN_ATTRS
446_mm_cmplt_pd(__m128d __a, __m128d __b)
447{
448 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__a, (__v2df)__b);
449}
450
466static __inline__ __m128d __DEFAULT_FN_ATTRS
467_mm_cmple_pd(__m128d __a, __m128d __b)
468{
469 return (__m128d)__builtin_ia32_cmplepd((__v2df)__a, (__v2df)__b);
470}
471
487static __inline__ __m128d __DEFAULT_FN_ATTRS
488_mm_cmpgt_pd(__m128d __a, __m128d __b)
489{
490 return (__m128d)__builtin_ia32_cmpltpd((__v2df)__b, (__v2df)__a);
491}
492
508static __inline__ __m128d __DEFAULT_FN_ATTRS
509_mm_cmpge_pd(__m128d __a, __m128d __b)
510{
511 return (__m128d)__builtin_ia32_cmplepd((__v2df)__b, (__v2df)__a);
512}
513
531static __inline__ __m128d __DEFAULT_FN_ATTRS
532_mm_cmpord_pd(__m128d __a, __m128d __b)
533{
534 return (__m128d)__builtin_ia32_cmpordpd((__v2df)__a, (__v2df)__b);
535}
536
555static __inline__ __m128d __DEFAULT_FN_ATTRS
556_mm_cmpunord_pd(__m128d __a, __m128d __b)
557{
558 return (__m128d)__builtin_ia32_cmpunordpd((__v2df)__a, (__v2df)__b);
559}
560
576static __inline__ __m128d __DEFAULT_FN_ATTRS
577_mm_cmpneq_pd(__m128d __a, __m128d __b)
578{
579 return (__m128d)__builtin_ia32_cmpneqpd((__v2df)__a, (__v2df)__b);
580}
581
597static __inline__ __m128d __DEFAULT_FN_ATTRS
598_mm_cmpnlt_pd(__m128d __a, __m128d __b)
599{
600 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__a, (__v2df)__b);
601}
602
618static __inline__ __m128d __DEFAULT_FN_ATTRS
619_mm_cmpnle_pd(__m128d __a, __m128d __b)
620{
621 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__a, (__v2df)__b);
622}
623
639static __inline__ __m128d __DEFAULT_FN_ATTRS
640_mm_cmpngt_pd(__m128d __a, __m128d __b)
641{
642 return (__m128d)__builtin_ia32_cmpnltpd((__v2df)__b, (__v2df)__a);
643}
644
660static __inline__ __m128d __DEFAULT_FN_ATTRS
661_mm_cmpnge_pd(__m128d __a, __m128d __b)
662{
663 return (__m128d)__builtin_ia32_cmpnlepd((__v2df)__b, (__v2df)__a);
664}
665
683static __inline__ __m128d __DEFAULT_FN_ATTRS
684_mm_cmpeq_sd(__m128d __a, __m128d __b)
685{
686 return (__m128d)__builtin_ia32_cmpeqsd((__v2df)__a, (__v2df)__b);
687}
688
708static __inline__ __m128d __DEFAULT_FN_ATTRS
709_mm_cmplt_sd(__m128d __a, __m128d __b)
710{
711 return (__m128d)__builtin_ia32_cmpltsd((__v2df)__a, (__v2df)__b);
712}
713
733static __inline__ __m128d __DEFAULT_FN_ATTRS
734_mm_cmple_sd(__m128d __a, __m128d __b)
735{
736 return (__m128d)__builtin_ia32_cmplesd((__v2df)__a, (__v2df)__b);
737}
738
758static __inline__ __m128d __DEFAULT_FN_ATTRS
759_mm_cmpgt_sd(__m128d __a, __m128d __b)
760{
761 __m128d __c = __builtin_ia32_cmpltsd((__v2df)__b, (__v2df)__a);
762 return __extension__ (__m128d) { __c[0], __a[1] };
763}
764
784static __inline__ __m128d __DEFAULT_FN_ATTRS
785_mm_cmpge_sd(__m128d __a, __m128d __b)
786{
787 __m128d __c = __builtin_ia32_cmplesd((__v2df)__b, (__v2df)__a);
788 return __extension__ (__m128d) { __c[0], __a[1] };
789}
790
812static __inline__ __m128d __DEFAULT_FN_ATTRS
813_mm_cmpord_sd(__m128d __a, __m128d __b)
814{
815 return (__m128d)__builtin_ia32_cmpordsd((__v2df)__a, (__v2df)__b);
816}
817
840static __inline__ __m128d __DEFAULT_FN_ATTRS
841_mm_cmpunord_sd(__m128d __a, __m128d __b)
842{
843 return (__m128d)__builtin_ia32_cmpunordsd((__v2df)__a, (__v2df)__b);
844}
845
865static __inline__ __m128d __DEFAULT_FN_ATTRS
866_mm_cmpneq_sd(__m128d __a, __m128d __b)
867{
868 return (__m128d)__builtin_ia32_cmpneqsd((__v2df)__a, (__v2df)__b);
869}
870
890static __inline__ __m128d __DEFAULT_FN_ATTRS
891_mm_cmpnlt_sd(__m128d __a, __m128d __b)
892{
893 return (__m128d)__builtin_ia32_cmpnltsd((__v2df)__a, (__v2df)__b);
894}
895
915static __inline__ __m128d __DEFAULT_FN_ATTRS
916_mm_cmpnle_sd(__m128d __a, __m128d __b)
917{
918 return (__m128d)__builtin_ia32_cmpnlesd((__v2df)__a, (__v2df)__b);
919}
920
940static __inline__ __m128d __DEFAULT_FN_ATTRS
941_mm_cmpngt_sd(__m128d __a, __m128d __b)
942{
943 __m128d __c = __builtin_ia32_cmpnltsd((__v2df)__b, (__v2df)__a);
944 return __extension__ (__m128d) { __c[0], __a[1] };
945}
946
966static __inline__ __m128d __DEFAULT_FN_ATTRS
967_mm_cmpnge_sd(__m128d __a, __m128d __b)
968{
969 __m128d __c = __builtin_ia32_cmpnlesd((__v2df)__b, (__v2df)__a);
970 return __extension__ (__m128d) { __c[0], __a[1] };
971}
972
991static __inline__ int __DEFAULT_FN_ATTRS
992_mm_comieq_sd(__m128d __a, __m128d __b)
993{
994 return __builtin_ia32_comisdeq((__v2df)__a, (__v2df)__b);
995}
996
1017static __inline__ int __DEFAULT_FN_ATTRS
1018_mm_comilt_sd(__m128d __a, __m128d __b)
1019{
1020 return __builtin_ia32_comisdlt((__v2df)__a, (__v2df)__b);
1021}
1022
1043static __inline__ int __DEFAULT_FN_ATTRS
1044_mm_comile_sd(__m128d __a, __m128d __b)
1045{
1046 return __builtin_ia32_comisdle((__v2df)__a, (__v2df)__b);
1047}
1048
1069static __inline__ int __DEFAULT_FN_ATTRS
1070_mm_comigt_sd(__m128d __a, __m128d __b)
1071{
1072 return __builtin_ia32_comisdgt((__v2df)__a, (__v2df)__b);
1073}
1074
1095static __inline__ int __DEFAULT_FN_ATTRS
1096_mm_comige_sd(__m128d __a, __m128d __b)
1097{
1098 return __builtin_ia32_comisdge((__v2df)__a, (__v2df)__b);
1099}
1100
1121static __inline__ int __DEFAULT_FN_ATTRS
1122_mm_comineq_sd(__m128d __a, __m128d __b)
1123{
1124 return __builtin_ia32_comisdneq((__v2df)__a, (__v2df)__b);
1125}
1126
1145static __inline__ int __DEFAULT_FN_ATTRS
1146_mm_ucomieq_sd(__m128d __a, __m128d __b)
1147{
1148 return __builtin_ia32_ucomisdeq((__v2df)__a, (__v2df)__b);
1149}
1150
1171static __inline__ int __DEFAULT_FN_ATTRS
1172_mm_ucomilt_sd(__m128d __a, __m128d __b)
1173{
1174 return __builtin_ia32_ucomisdlt((__v2df)__a, (__v2df)__b);
1175}
1176
1197static __inline__ int __DEFAULT_FN_ATTRS
1198_mm_ucomile_sd(__m128d __a, __m128d __b)
1199{
1200 return __builtin_ia32_ucomisdle((__v2df)__a, (__v2df)__b);
1201}
1202
1223static __inline__ int __DEFAULT_FN_ATTRS
1224_mm_ucomigt_sd(__m128d __a, __m128d __b)
1225{
1226 return __builtin_ia32_ucomisdgt((__v2df)__a, (__v2df)__b);
1227}
1228
1249static __inline__ int __DEFAULT_FN_ATTRS
1250_mm_ucomige_sd(__m128d __a, __m128d __b)
1251{
1252 return __builtin_ia32_ucomisdge((__v2df)__a, (__v2df)__b);
1253}
1254
1275static __inline__ int __DEFAULT_FN_ATTRS
1276_mm_ucomineq_sd(__m128d __a, __m128d __b)
1277{
1278 return __builtin_ia32_ucomisdneq((__v2df)__a, (__v2df)__b);
1279}
1280
1294static __inline__ __m128 __DEFAULT_FN_ATTRS
1296{
1297 return __builtin_ia32_cvtpd2ps((__v2df)__a);
1298}
1299
1314static __inline__ __m128d __DEFAULT_FN_ATTRS
1316{
1317 return (__m128d) __builtin_convertvector(
1318 __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df);
1319}
1320
1337static __inline__ __m128d __DEFAULT_FN_ATTRS
1339{
1340 return (__m128d) __builtin_convertvector(
1341 __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df);
1342}
1343
1357static __inline__ __m128i __DEFAULT_FN_ATTRS
1359{
1360 return __builtin_ia32_cvtpd2dq((__v2df)__a);
1361}
1362
1374static __inline__ int __DEFAULT_FN_ATTRS
1376{
1377 return __builtin_ia32_cvtsd2si((__v2df)__a);
1378}
1379
1399static __inline__ __m128 __DEFAULT_FN_ATTRS
1400_mm_cvtsd_ss(__m128 __a, __m128d __b)
1401{
1402 return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b);
1403}
1404
1422static __inline__ __m128d __DEFAULT_FN_ATTRS
1424{
1425 __a[0] = __b;
1426 return __a;
1427}
1428
1448static __inline__ __m128d __DEFAULT_FN_ATTRS
1449_mm_cvtss_sd(__m128d __a, __m128 __b)
1450{
1451 __a[0] = __b[0];
1452 return __a;
1453}
1454
1472static __inline__ __m128i __DEFAULT_FN_ATTRS
1474{
1475 return (__m128i)__builtin_ia32_cvttpd2dq((__v2df)__a);
1476}
1477
1490static __inline__ int __DEFAULT_FN_ATTRS
1492{
1493 return __builtin_ia32_cvttsd2si((__v2df)__a);
1494}
1495
1507static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1509{
1510 return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a);
1511}
1512
1527static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1529{
1530 return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a);
1531}
1532
1544static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX
1546{
1547 return __builtin_ia32_cvtpi2pd((__v2si)__a);
1548}
1549
1561static __inline__ double __DEFAULT_FN_ATTRS
1563{
1564 return __a[0];
1565}
1566
1578static __inline__ __m128d __DEFAULT_FN_ATTRS
1579_mm_load_pd(double const *__dp)
1580{
1581 return *(const __m128d*)__dp;
1582}
1583
1596static __inline__ __m128d __DEFAULT_FN_ATTRS
1597_mm_load1_pd(double const *__dp)
1598{
1599 struct __mm_load1_pd_struct {
1600 double __u;
1601 } __attribute__((__packed__, __may_alias__));
1602 double __u = ((const struct __mm_load1_pd_struct*)__dp)->__u;
1603 return __extension__ (__m128d){ __u, __u };
1604}
1605
1606#define _mm_load_pd1(dp) _mm_load1_pd(dp)
1607
1622static __inline__ __m128d __DEFAULT_FN_ATTRS
1623_mm_loadr_pd(double const *__dp)
1624{
1625 __m128d __u = *(const __m128d*)__dp;
1626 return __builtin_shufflevector((__v2df)__u, (__v2df)__u, 1, 0);
1627}
1628
1640static __inline__ __m128d __DEFAULT_FN_ATTRS
1641_mm_loadu_pd(double const *__dp)
1642{
1643 struct __loadu_pd {
1644 __m128d_u __v;
1645 } __attribute__((__packed__, __may_alias__));
1646 return ((const struct __loadu_pd*)__dp)->__v;
1647}
1648
1660static __inline__ __m128i __DEFAULT_FN_ATTRS
1662{
1663 struct __loadu_si64 {
1664 long long __v;
1665 } __attribute__((__packed__, __may_alias__));
1666 long long __u = ((const struct __loadu_si64*)__a)->__v;
1667 return __extension__ (__m128i)(__v2di){__u, 0LL};
1668}
1669
1681static __inline__ __m128i __DEFAULT_FN_ATTRS
1683{
1684 struct __loadu_si32 {
1685 int __v;
1686 } __attribute__((__packed__, __may_alias__));
1687 int __u = ((const struct __loadu_si32*)__a)->__v;
1688 return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
1689}
1690
1702static __inline__ __m128i __DEFAULT_FN_ATTRS
1704{
1705 struct __loadu_si16 {
1706 short __v;
1707 } __attribute__((__packed__, __may_alias__));
1708 short __u = ((const struct __loadu_si16*)__a)->__v;
1709 return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
1710}
1711
1723static __inline__ __m128d __DEFAULT_FN_ATTRS
1724_mm_load_sd(double const *__dp)
1725{
1726 struct __mm_load_sd_struct {
1727 double __u;
1728 } __attribute__((__packed__, __may_alias__));
1729 double __u = ((const struct __mm_load_sd_struct*)__dp)->__u;
1730 return __extension__ (__m128d){ __u, 0 };
1731}
1732
1750static __inline__ __m128d __DEFAULT_FN_ATTRS
1751_mm_loadh_pd(__m128d __a, double const *__dp)
1752{
1753 struct __mm_loadh_pd_struct {
1754 double __u;
1755 } __attribute__((__packed__, __may_alias__));
1756 double __u = ((const struct __mm_loadh_pd_struct*)__dp)->__u;
1757 return __extension__ (__m128d){ __a[0], __u };
1758}
1759
1777static __inline__ __m128d __DEFAULT_FN_ATTRS
1778_mm_loadl_pd(__m128d __a, double const *__dp)
1779{
1780 struct __mm_loadl_pd_struct {
1781 double __u;
1782 } __attribute__((__packed__, __may_alias__));
1783 double __u = ((const struct __mm_loadl_pd_struct*)__dp)->__u;
1784 return __extension__ (__m128d){ __u, __a[1] };
1785}
1786
1798static __inline__ __m128d __DEFAULT_FN_ATTRS
1800{
1801 return (__m128d)__builtin_ia32_undef128();
1802}
1803
1818static __inline__ __m128d __DEFAULT_FN_ATTRS
1819_mm_set_sd(double __w)
1820{
1821 return __extension__ (__m128d){ __w, 0 };
1822}
1823
1836static __inline__ __m128d __DEFAULT_FN_ATTRS
1837_mm_set1_pd(double __w)
1838{
1839 return __extension__ (__m128d){ __w, __w };
1840}
1841
1854static __inline__ __m128d __DEFAULT_FN_ATTRS
1855_mm_set_pd1(double __w)
1856{
1857 return _mm_set1_pd(__w);
1858}
1859
1874static __inline__ __m128d __DEFAULT_FN_ATTRS
1875_mm_set_pd(double __w, double __x)
1876{
1877 return __extension__ (__m128d){ __x, __w };
1878}
1879
1895static __inline__ __m128d __DEFAULT_FN_ATTRS
1896_mm_setr_pd(double __w, double __x)
1897{
1898 return __extension__ (__m128d){ __w, __x };
1899}
1900
1910static __inline__ __m128d __DEFAULT_FN_ATTRS
1912{
1913 return __extension__ (__m128d){ 0, 0 };
1914}
1915
1931static __inline__ __m128d __DEFAULT_FN_ATTRS
1932_mm_move_sd(__m128d __a, __m128d __b)
1933{
1934 __a[0] = __b[0];
1935 return __a;
1936}
1937
1949static __inline__ void __DEFAULT_FN_ATTRS
1950_mm_store_sd(double *__dp, __m128d __a)
1951{
1952 struct __mm_store_sd_struct {
1953 double __u;
1954 } __attribute__((__packed__, __may_alias__));
1955 ((struct __mm_store_sd_struct*)__dp)->__u = __a[0];
1956}
1957
1971static __inline__ void __DEFAULT_FN_ATTRS
1972_mm_store_pd(double *__dp, __m128d __a)
1973{
1974 *(__m128d*)__dp = __a;
1975}
1976
1991static __inline__ void __DEFAULT_FN_ATTRS
1992_mm_store1_pd(double *__dp, __m128d __a)
1993{
1994 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
1995 _mm_store_pd(__dp, __a);
1996}
1997
2012static __inline__ void __DEFAULT_FN_ATTRS
2013_mm_store_pd1(double *__dp, __m128d __a)
2014{
2015 _mm_store1_pd(__dp, __a);
2016}
2017
2030static __inline__ void __DEFAULT_FN_ATTRS
2031_mm_storeu_pd(double *__dp, __m128d __a)
2032{
2033 struct __storeu_pd {
2034 __m128d_u __v;
2035 } __attribute__((__packed__, __may_alias__));
2036 ((struct __storeu_pd*)__dp)->__v = __a;
2037}
2038
2053static __inline__ void __DEFAULT_FN_ATTRS
2054_mm_storer_pd(double *__dp, __m128d __a)
2055{
2056 __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 1, 0);
2057 *(__m128d *)__dp = __a;
2058}
2059
2071static __inline__ void __DEFAULT_FN_ATTRS
2072_mm_storeh_pd(double *__dp, __m128d __a)
2073{
2074 struct __mm_storeh_pd_struct {
2075 double __u;
2076 } __attribute__((__packed__, __may_alias__));
2077 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1];
2078}
2079
2091static __inline__ void __DEFAULT_FN_ATTRS
2092_mm_storel_pd(double *__dp, __m128d __a)
2093{
2094 struct __mm_storeh_pd_struct {
2095 double __u;
2096 } __attribute__((__packed__, __may_alias__));
2097 ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0];
2098}
2099
2116static __inline__ __m128i __DEFAULT_FN_ATTRS
2117_mm_add_epi8(__m128i __a, __m128i __b)
2118{
2119 return (__m128i)((__v16qu)__a + (__v16qu)__b);
2120}
2121
2138static __inline__ __m128i __DEFAULT_FN_ATTRS
2139_mm_add_epi16(__m128i __a, __m128i __b)
2140{
2141 return (__m128i)((__v8hu)__a + (__v8hu)__b);
2142}
2143
2160static __inline__ __m128i __DEFAULT_FN_ATTRS
2161_mm_add_epi32(__m128i __a, __m128i __b)
2162{
2163 return (__m128i)((__v4su)__a + (__v4su)__b);
2164}
2165
2178static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2179_mm_add_si64(__m64 __a, __m64 __b)
2180{
2181 return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b);
2182}
2183
2200static __inline__ __m128i __DEFAULT_FN_ATTRS
2201_mm_add_epi64(__m128i __a, __m128i __b)
2202{
2203 return (__m128i)((__v2du)__a + (__v2du)__b);
2204}
2205
2221static __inline__ __m128i __DEFAULT_FN_ATTRS
2222_mm_adds_epi8(__m128i __a, __m128i __b)
2223{
2224 return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b);
2225}
2226
2243static __inline__ __m128i __DEFAULT_FN_ATTRS
2244_mm_adds_epi16(__m128i __a, __m128i __b)
2245{
2246 return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b);
2247}
2248
2264static __inline__ __m128i __DEFAULT_FN_ATTRS
2265_mm_adds_epu8(__m128i __a, __m128i __b)
2266{
2267 return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b);
2268}
2269
2285static __inline__ __m128i __DEFAULT_FN_ATTRS
2286_mm_adds_epu16(__m128i __a, __m128i __b)
2287{
2288 return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b);
2289}
2290
2305static __inline__ __m128i __DEFAULT_FN_ATTRS
2306_mm_avg_epu8(__m128i __a, __m128i __b)
2307{
2308 return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
2309}
2310
2325static __inline__ __m128i __DEFAULT_FN_ATTRS
2326_mm_avg_epu16(__m128i __a, __m128i __b)
2327{
2328 return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
2329}
2330
2351static __inline__ __m128i __DEFAULT_FN_ATTRS
2352_mm_madd_epi16(__m128i __a, __m128i __b)
2353{
2354 return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b);
2355}
2356
2371static __inline__ __m128i __DEFAULT_FN_ATTRS
2372_mm_max_epi16(__m128i __a, __m128i __b)
2373{
2374 return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
2375}
2376
2391static __inline__ __m128i __DEFAULT_FN_ATTRS
2392_mm_max_epu8(__m128i __a, __m128i __b)
2393{
2394 return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
2395}
2396
2411static __inline__ __m128i __DEFAULT_FN_ATTRS
2412_mm_min_epi16(__m128i __a, __m128i __b)
2413{
2414 return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
2415}
2416
2431static __inline__ __m128i __DEFAULT_FN_ATTRS
2432_mm_min_epu8(__m128i __a, __m128i __b)
2433{
2434 return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
2435}
2436
2451static __inline__ __m128i __DEFAULT_FN_ATTRS
2452_mm_mulhi_epi16(__m128i __a, __m128i __b)
2453{
2454 return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b);
2455}
2456
2471static __inline__ __m128i __DEFAULT_FN_ATTRS
2472_mm_mulhi_epu16(__m128i __a, __m128i __b)
2473{
2474 return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b);
2475}
2476
2491static __inline__ __m128i __DEFAULT_FN_ATTRS
2492_mm_mullo_epi16(__m128i __a, __m128i __b)
2493{
2494 return (__m128i)((__v8hu)__a * (__v8hu)__b);
2495}
2496
2510static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2511_mm_mul_su32(__m64 __a, __m64 __b)
2512{
2513 return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b);
2514}
2515
2529static __inline__ __m128i __DEFAULT_FN_ATTRS
2530_mm_mul_epu32(__m128i __a, __m128i __b)
2531{
2532 return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b);
2533}
2534
2551static __inline__ __m128i __DEFAULT_FN_ATTRS
2552_mm_sad_epu8(__m128i __a, __m128i __b)
2553{
2554 return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b);
2555}
2556
2569static __inline__ __m128i __DEFAULT_FN_ATTRS
2570_mm_sub_epi8(__m128i __a, __m128i __b)
2571{
2572 return (__m128i)((__v16qu)__a - (__v16qu)__b);
2573}
2574
2587static __inline__ __m128i __DEFAULT_FN_ATTRS
2588_mm_sub_epi16(__m128i __a, __m128i __b)
2589{
2590 return (__m128i)((__v8hu)__a - (__v8hu)__b);
2591}
2592
2605static __inline__ __m128i __DEFAULT_FN_ATTRS
2606_mm_sub_epi32(__m128i __a, __m128i __b)
2607{
2608 return (__m128i)((__v4su)__a - (__v4su)__b);
2609}
2610
2624static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2625_mm_sub_si64(__m64 __a, __m64 __b)
2626{
2627 return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b);
2628}
2629
2642static __inline__ __m128i __DEFAULT_FN_ATTRS
2643_mm_sub_epi64(__m128i __a, __m128i __b)
2644{
2645 return (__m128i)((__v2du)__a - (__v2du)__b);
2646}
2647
2663static __inline__ __m128i __DEFAULT_FN_ATTRS
2664_mm_subs_epi8(__m128i __a, __m128i __b)
2665{
2666 return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b);
2667}
2668
2684static __inline__ __m128i __DEFAULT_FN_ATTRS
2685_mm_subs_epi16(__m128i __a, __m128i __b)
2686{
2687 return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b);
2688}
2689
2704static __inline__ __m128i __DEFAULT_FN_ATTRS
2705_mm_subs_epu8(__m128i __a, __m128i __b)
2706{
2707 return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b);
2708}
2709
2724static __inline__ __m128i __DEFAULT_FN_ATTRS
2725_mm_subs_epu16(__m128i __a, __m128i __b)
2726{
2727 return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b);
2728}
2729
2742static __inline__ __m128i __DEFAULT_FN_ATTRS
2743_mm_and_si128(__m128i __a, __m128i __b)
2744{
2745 return (__m128i)((__v2du)__a & (__v2du)__b);
2746}
2747
2762static __inline__ __m128i __DEFAULT_FN_ATTRS
2763_mm_andnot_si128(__m128i __a, __m128i __b)
2764{
2765 return (__m128i)(~(__v2du)__a & (__v2du)__b);
2766}
2779static __inline__ __m128i __DEFAULT_FN_ATTRS
2780_mm_or_si128(__m128i __a, __m128i __b)
2781{
2782 return (__m128i)((__v2du)__a | (__v2du)__b);
2783}
2784
2797static __inline__ __m128i __DEFAULT_FN_ATTRS
2798_mm_xor_si128(__m128i __a, __m128i __b)
2799{
2800 return (__m128i)((__v2du)__a ^ (__v2du)__b);
2801}
2802
2820#define _mm_slli_si128(a, imm) \
2821 (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
2822
2823#define _mm_bslli_si128(a, imm) \
2824 (__m128i)__builtin_ia32_pslldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
2825
2839static __inline__ __m128i __DEFAULT_FN_ATTRS
2840_mm_slli_epi16(__m128i __a, int __count)
2841{
2842 return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count);
2843}
2844
2858static __inline__ __m128i __DEFAULT_FN_ATTRS
2859_mm_sll_epi16(__m128i __a, __m128i __count)
2860{
2861 return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count);
2862}
2863
2877static __inline__ __m128i __DEFAULT_FN_ATTRS
2878_mm_slli_epi32(__m128i __a, int __count)
2879{
2880 return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count);
2881}
2882
2896static __inline__ __m128i __DEFAULT_FN_ATTRS
2897_mm_sll_epi32(__m128i __a, __m128i __count)
2898{
2899 return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count);
2900}
2901
2915static __inline__ __m128i __DEFAULT_FN_ATTRS
2916_mm_slli_epi64(__m128i __a, int __count)
2917{
2918 return __builtin_ia32_psllqi128((__v2di)__a, __count);
2919}
2920
2934static __inline__ __m128i __DEFAULT_FN_ATTRS
2935_mm_sll_epi64(__m128i __a, __m128i __count)
2936{
2937 return __builtin_ia32_psllq128((__v2di)__a, (__v2di)__count);
2938}
2939
2954static __inline__ __m128i __DEFAULT_FN_ATTRS
2955_mm_srai_epi16(__m128i __a, int __count)
2956{
2957 return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count);
2958}
2959
2974static __inline__ __m128i __DEFAULT_FN_ATTRS
2975_mm_sra_epi16(__m128i __a, __m128i __count)
2976{
2977 return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count);
2978}
2979
2994static __inline__ __m128i __DEFAULT_FN_ATTRS
2995_mm_srai_epi32(__m128i __a, int __count)
2996{
2997 return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count);
2998}
2999
3014static __inline__ __m128i __DEFAULT_FN_ATTRS
3015_mm_sra_epi32(__m128i __a, __m128i __count)
3016{
3017 return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count);
3018}
3019
3037#define _mm_srli_si128(a, imm) \
3038 (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
3039
3040#define _mm_bsrli_si128(a, imm) \
3041 (__m128i)__builtin_ia32_psrldqi128_byteshift((__v2di)(__m128i)(a), (int)(imm))
3042
3056static __inline__ __m128i __DEFAULT_FN_ATTRS
3057_mm_srli_epi16(__m128i __a, int __count)
3058{
3059 return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count);
3060}
3061
3075static __inline__ __m128i __DEFAULT_FN_ATTRS
3076_mm_srl_epi16(__m128i __a, __m128i __count)
3077{
3078 return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count);
3079}
3080
3094static __inline__ __m128i __DEFAULT_FN_ATTRS
3095_mm_srli_epi32(__m128i __a, int __count)
3096{
3097 return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count);
3098}
3099
3113static __inline__ __m128i __DEFAULT_FN_ATTRS
3114_mm_srl_epi32(__m128i __a, __m128i __count)
3115{
3116 return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count);
3117}
3118
3132static __inline__ __m128i __DEFAULT_FN_ATTRS
3133_mm_srli_epi64(__m128i __a, int __count)
3134{
3135 return __builtin_ia32_psrlqi128((__v2di)__a, __count);
3136}
3137
3151static __inline__ __m128i __DEFAULT_FN_ATTRS
3152_mm_srl_epi64(__m128i __a, __m128i __count)
3153{
3154 return __builtin_ia32_psrlq128((__v2di)__a, (__v2di)__count);
3155}
3156
3170static __inline__ __m128i __DEFAULT_FN_ATTRS
3171_mm_cmpeq_epi8(__m128i __a, __m128i __b)
3172{
3173 return (__m128i)((__v16qi)__a == (__v16qi)__b);
3174}
3175
3189static __inline__ __m128i __DEFAULT_FN_ATTRS
3190_mm_cmpeq_epi16(__m128i __a, __m128i __b)
3191{
3192 return (__m128i)((__v8hi)__a == (__v8hi)__b);
3193}
3194
3208static __inline__ __m128i __DEFAULT_FN_ATTRS
3209_mm_cmpeq_epi32(__m128i __a, __m128i __b)
3210{
3211 return (__m128i)((__v4si)__a == (__v4si)__b);
3212}
3213
3228static __inline__ __m128i __DEFAULT_FN_ATTRS
3229_mm_cmpgt_epi8(__m128i __a, __m128i __b)
3230{
3231 /* This function always performs a signed comparison, but __v16qi is a char
3232 which may be signed or unsigned, so use __v16qs. */
3233 return (__m128i)((__v16qs)__a > (__v16qs)__b);
3234}
3235
3251static __inline__ __m128i __DEFAULT_FN_ATTRS
3252_mm_cmpgt_epi16(__m128i __a, __m128i __b)
3253{
3254 return (__m128i)((__v8hi)__a > (__v8hi)__b);
3255}
3256
3272static __inline__ __m128i __DEFAULT_FN_ATTRS
3273_mm_cmpgt_epi32(__m128i __a, __m128i __b)
3274{
3275 return (__m128i)((__v4si)__a > (__v4si)__b);
3276}
3277
3293static __inline__ __m128i __DEFAULT_FN_ATTRS
3294_mm_cmplt_epi8(__m128i __a, __m128i __b)
3295{
3296 return _mm_cmpgt_epi8(__b, __a);
3297}
3298
3314static __inline__ __m128i __DEFAULT_FN_ATTRS
3315_mm_cmplt_epi16(__m128i __a, __m128i __b)
3316{
3317 return _mm_cmpgt_epi16(__b, __a);
3318}
3319
3335static __inline__ __m128i __DEFAULT_FN_ATTRS
3336_mm_cmplt_epi32(__m128i __a, __m128i __b)
3337{
3338 return _mm_cmpgt_epi32(__b, __a);
3339}
3340
3341#ifdef __x86_64__
3359static __inline__ __m128d __DEFAULT_FN_ATTRS
3360_mm_cvtsi64_sd(__m128d __a, long long __b)
3361{
3362 __a[0] = __b;
3363 return __a;
3364}
3365
3377static __inline__ long long __DEFAULT_FN_ATTRS
3378_mm_cvtsd_si64(__m128d __a)
3379{
3380 return __builtin_ia32_cvtsd2si64((__v2df)__a);
3381}
3382
3395static __inline__ long long __DEFAULT_FN_ATTRS
3396_mm_cvttsd_si64(__m128d __a)
3397{
3398 return __builtin_ia32_cvttsd2si64((__v2df)__a);
3399}
3400#endif
3401
3411static __inline__ __m128 __DEFAULT_FN_ATTRS
3413{
3414 return (__m128)__builtin_convertvector((__v4si)__a, __v4sf);
3415}
3416
3427static __inline__ __m128i __DEFAULT_FN_ATTRS
3429{
3430 return (__m128i)__builtin_ia32_cvtps2dq((__v4sf)__a);
3431}
3432
3444static __inline__ __m128i __DEFAULT_FN_ATTRS
3446{
3447 return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a);
3448}
3449
3460static __inline__ __m128i __DEFAULT_FN_ATTRS
3462{
3463 return __extension__ (__m128i)(__v4si){ __a, 0, 0, 0 };
3464}
3465
3466#ifdef __x86_64__
3477static __inline__ __m128i __DEFAULT_FN_ATTRS
3478_mm_cvtsi64_si128(long long __a)
3479{
3480 return __extension__ (__m128i)(__v2di){ __a, 0 };
3481}
3482#endif
3483
3495static __inline__ int __DEFAULT_FN_ATTRS
3497{
3498 __v4si __b = (__v4si)__a;
3499 return __b[0];
3500}
3501
3502#ifdef __x86_64__
3514static __inline__ long long __DEFAULT_FN_ATTRS
3515_mm_cvtsi128_si64(__m128i __a)
3516{
3517 return __a[0];
3518}
3519#endif
3520
3531static __inline__ __m128i __DEFAULT_FN_ATTRS
3532_mm_load_si128(__m128i const *__p)
3533{
3534 return *__p;
3535}
3536
3547static __inline__ __m128i __DEFAULT_FN_ATTRS
3548_mm_loadu_si128(__m128i_u const *__p)
3549{
3550 struct __loadu_si128 {
3551 __m128i_u __v;
3552 } __attribute__((__packed__, __may_alias__));
3553 return ((const struct __loadu_si128*)__p)->__v;
3554}
3555
3568static __inline__ __m128i __DEFAULT_FN_ATTRS
3569_mm_loadl_epi64(__m128i_u const *__p)
3570{
3571 struct __mm_loadl_epi64_struct {
3572 long long __u;
3573 } __attribute__((__packed__, __may_alias__));
3574 return __extension__ (__m128i) { ((const struct __mm_loadl_epi64_struct*)__p)->__u, 0};
3575}
3576
3586static __inline__ __m128i __DEFAULT_FN_ATTRS
3588{
3589 return (__m128i)__builtin_ia32_undef128();
3590}
3591
3608static __inline__ __m128i __DEFAULT_FN_ATTRS
3609_mm_set_epi64x(long long __q1, long long __q0)
3610{
3611 return __extension__ (__m128i)(__v2di){ __q0, __q1 };
3612}
3613
3630static __inline__ __m128i __DEFAULT_FN_ATTRS
3631_mm_set_epi64(__m64 __q1, __m64 __q0)
3632{
3633 return _mm_set_epi64x((long long)__q1, (long long)__q0);
3634}
3635
3658static __inline__ __m128i __DEFAULT_FN_ATTRS
3659_mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
3660{
3661 return __extension__ (__m128i)(__v4si){ __i0, __i1, __i2, __i3};
3662}
3663
3698static __inline__ __m128i __DEFAULT_FN_ATTRS
3699_mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
3700{
3701 return __extension__ (__m128i)(__v8hi){ __w0, __w1, __w2, __w3, __w4, __w5, __w6, __w7 };
3702}
3703
3746static __inline__ __m128i __DEFAULT_FN_ATTRS
3747_mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
3748{
3749 return __extension__ (__m128i)(__v16qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7, __b8, __b9, __b10, __b11, __b12, __b13, __b14, __b15 };
3750}
3751
3765static __inline__ __m128i __DEFAULT_FN_ATTRS
3766_mm_set1_epi64x(long long __q)
3767{
3768 return _mm_set_epi64x(__q, __q);
3769}
3770
3784static __inline__ __m128i __DEFAULT_FN_ATTRS
3786{
3787 return _mm_set_epi64(__q, __q);
3788}
3789
3803static __inline__ __m128i __DEFAULT_FN_ATTRS
3805{
3806 return _mm_set_epi32(__i, __i, __i, __i);
3807}
3808
3822static __inline__ __m128i __DEFAULT_FN_ATTRS
3824{
3825 return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w);
3826}
3827
3841static __inline__ __m128i __DEFAULT_FN_ATTRS
3843{
3844 return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b);
3845}
3846
3861static __inline__ __m128i __DEFAULT_FN_ATTRS
3862_mm_setr_epi64(__m64 __q0, __m64 __q1)
3863{
3864 return _mm_set_epi64(__q1, __q0);
3865}
3866
3884static __inline__ __m128i __DEFAULT_FN_ATTRS
3885_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
3886{
3887 return _mm_set_epi32(__i3, __i2, __i1, __i0);
3888}
3889
3915static __inline__ __m128i __DEFAULT_FN_ATTRS
3916_mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
3917{
3918 return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0);
3919}
3920
3962static __inline__ __m128i __DEFAULT_FN_ATTRS
3963_mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
3964{
3965 return _mm_set_epi8(__b15, __b14, __b13, __b12, __b11, __b10, __b9, __b8, __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
3966}
3967
3976static __inline__ __m128i __DEFAULT_FN_ATTRS
3978{
3979 return __extension__ (__m128i)(__v2di){ 0LL, 0LL };
3980}
3981
3994static __inline__ void __DEFAULT_FN_ATTRS
3995_mm_store_si128(__m128i *__p, __m128i __b)
3996{
3997 *__p = __b;
3998}
3999
4010static __inline__ void __DEFAULT_FN_ATTRS
4011_mm_storeu_si128(__m128i_u *__p, __m128i __b)
4012{
4013 struct __storeu_si128 {
4014 __m128i_u __v;
4015 } __attribute__((__packed__, __may_alias__));
4016 ((struct __storeu_si128*)__p)->__v = __b;
4017}
4018
4031static __inline__ void __DEFAULT_FN_ATTRS
4032_mm_storeu_si64(void *__p, __m128i __b)
4033{
4034 struct __storeu_si64 {
4035 long long __v;
4036 } __attribute__((__packed__, __may_alias__));
4037 ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
4038}
4039
4052static __inline__ void __DEFAULT_FN_ATTRS
4053_mm_storeu_si32(void *__p, __m128i __b)
4054{
4055 struct __storeu_si32 {
4056 int __v;
4057 } __attribute__((__packed__, __may_alias__));
4058 ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
4059}
4060
4073static __inline__ void __DEFAULT_FN_ATTRS
4074_mm_storeu_si16(void *__p, __m128i __b)
4075{
4076 struct __storeu_si16 {
4077 short __v;
4078 } __attribute__((__packed__, __may_alias__));
4079 ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
4080}
4081
4103static __inline__ void __DEFAULT_FN_ATTRS
4104_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
4105{
4106 __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p);
4107}
4108
4122static __inline__ void __DEFAULT_FN_ATTRS
4123_mm_storel_epi64(__m128i_u *__p, __m128i __a)
4124{
4125 struct __mm_storel_epi64_struct {
4126 long long __u;
4127 } __attribute__((__packed__, __may_alias__));
4128 ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0];
4129}
4130
4145static __inline__ void __DEFAULT_FN_ATTRS
4146_mm_stream_pd(double *__p, __m128d __a)
4147{
4148 __builtin_nontemporal_store((__v2df)__a, (__v2df*)__p);
4149}
4150
4164static __inline__ void __DEFAULT_FN_ATTRS
4165_mm_stream_si128(__m128i *__p, __m128i __a)
4166{
4167 __builtin_nontemporal_store((__v2di)__a, (__v2di*)__p);
4168}
4169
4183static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4184_mm_stream_si32(int *__p, int __a)
4185{
4186 __builtin_ia32_movnti(__p, __a);
4187}
4188
4189#ifdef __x86_64__
4203static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
4204_mm_stream_si64(long long *__p, long long __a)
4205{
4206 __builtin_ia32_movnti64(__p, __a);
4207}
4208#endif
4209
4210#if defined(__cplusplus)
4211extern "C" {
4212#endif
4213
4224void _mm_clflush(void const * __p);
4225
4235void _mm_lfence(void);
4236
4246void _mm_mfence(void);
4247
4248#if defined(__cplusplus)
4249} // extern "C"
4250#endif
4251
4274static __inline__ __m128i __DEFAULT_FN_ATTRS
4275_mm_packs_epi16(__m128i __a, __m128i __b)
4276{
4277 return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b);
4278}
4279
4302static __inline__ __m128i __DEFAULT_FN_ATTRS
4303_mm_packs_epi32(__m128i __a, __m128i __b)
4304{
4305 return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b);
4306}
4307
4330static __inline__ __m128i __DEFAULT_FN_ATTRS
4331_mm_packus_epi16(__m128i __a, __m128i __b)
4332{
4333 return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b);
4334}
4335
4358#define _mm_extract_epi16(a, imm) \
4359 (int)(unsigned short)__builtin_ia32_vec_ext_v8hi((__v8hi)(__m128i)(a), \
4360 (int)(imm))
4361
4382#define _mm_insert_epi16(a, b, imm) \
4383 (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)(__m128i)(a), (int)(b), \
4384 (int)(imm))
4385
4398static __inline__ int __DEFAULT_FN_ATTRS
4400{
4401 return __builtin_ia32_pmovmskb128((__v16qi)__a);
4402}
4403
4432#define _mm_shuffle_epi32(a, imm) \
4433 (__m128i)__builtin_ia32_pshufd((__v4si)(__m128i)(a), (int)(imm))
4434
4462#define _mm_shufflelo_epi16(a, imm) \
4463 (__m128i)__builtin_ia32_pshuflw((__v8hi)(__m128i)(a), (int)(imm))
4464
4492#define _mm_shufflehi_epi16(a, imm) \
4493 (__m128i)__builtin_ia32_pshufhw((__v8hi)(__m128i)(a), (int)(imm))
4494
4524static __inline__ __m128i __DEFAULT_FN_ATTRS
4525_mm_unpackhi_epi8(__m128i __a, __m128i __b)
4526{
4527 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
4528}
4529
4551static __inline__ __m128i __DEFAULT_FN_ATTRS
4552_mm_unpackhi_epi16(__m128i __a, __m128i __b)
4553{
4554 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7);
4555}
4556
4574static __inline__ __m128i __DEFAULT_FN_ATTRS
4575_mm_unpackhi_epi32(__m128i __a, __m128i __b)
4576{
4577 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3);
4578}
4579
4595static __inline__ __m128i __DEFAULT_FN_ATTRS
4596_mm_unpackhi_epi64(__m128i __a, __m128i __b)
4597{
4598 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 1, 2+1);
4599}
4600
4630static __inline__ __m128i __DEFAULT_FN_ATTRS
4631_mm_unpacklo_epi8(__m128i __a, __m128i __b)
4632{
4633 return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7);
4634}
4635
4658static __inline__ __m128i __DEFAULT_FN_ATTRS
4659_mm_unpacklo_epi16(__m128i __a, __m128i __b)
4660{
4661 return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3);
4662}
4663
4681static __inline__ __m128i __DEFAULT_FN_ATTRS
4682_mm_unpacklo_epi32(__m128i __a, __m128i __b)
4683{
4684 return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1);
4685}
4686
4702static __inline__ __m128i __DEFAULT_FN_ATTRS
4703_mm_unpacklo_epi64(__m128i __a, __m128i __b)
4704{
4705 return (__m128i)__builtin_shufflevector((__v2di)__a, (__v2di)__b, 0, 2+0);
4706}
4707
4719static __inline__ __m64 __DEFAULT_FN_ATTRS
4721{
4722 return (__m64)__a[0];
4723}
4724
4736static __inline__ __m128i __DEFAULT_FN_ATTRS
4738{
4739 return __extension__ (__m128i)(__v2di){ (long long)__a, 0 };
4740}
4741
4754static __inline__ __m128i __DEFAULT_FN_ATTRS
4756{
4757 return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2);
4758}
4759
4775static __inline__ __m128d __DEFAULT_FN_ATTRS
4776_mm_unpackhi_pd(__m128d __a, __m128d __b)
4777{
4778 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2+1);
4779}
4780
4796static __inline__ __m128d __DEFAULT_FN_ATTRS
4797_mm_unpacklo_pd(__m128d __a, __m128d __b)
4798{
4799 return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2+0);
4800}
4801
4815static __inline__ int __DEFAULT_FN_ATTRS
4817{
4818 return __builtin_ia32_movmskpd((__v2df)__a);
4819}
4820
4821
4846#define _mm_shuffle_pd(a, b, i) \
4847 (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
4848 (int)(i))
4849
4861static __inline__ __m128 __DEFAULT_FN_ATTRS
4863{
4864 return (__m128)__a;
4865}
4866
4878static __inline__ __m128i __DEFAULT_FN_ATTRS
4880{
4881 return (__m128i)__a;
4882}
4883
4895static __inline__ __m128d __DEFAULT_FN_ATTRS
4897{
4898 return (__m128d)__a;
4899}
4900
4912static __inline__ __m128i __DEFAULT_FN_ATTRS
4914{
4915 return (__m128i)__a;
4916}
4917
4929static __inline__ __m128 __DEFAULT_FN_ATTRS
4931{
4932 return (__m128)__a;
4933}
4934
4946static __inline__ __m128d __DEFAULT_FN_ATTRS
4948{
4949 return (__m128d)__a;
4950}
4951
4952#if defined(__cplusplus)
4953extern "C" {
4954#endif
4955
4963void _mm_pause(void);
4964
4965#if defined(__cplusplus)
4966} // extern "C"
4967#endif
4968#undef __DEFAULT_FN_ATTRS
4969#undef __DEFAULT_FN_ATTRS_MMX
4970
4971#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y))
4972
4973#define _MM_DENORMALS_ZERO_ON (0x0040U)
4974#define _MM_DENORMALS_ZERO_OFF (0x0000U)
4975
4976#define _MM_DENORMALS_ZERO_MASK (0x0040U)
4977
4978#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
4979#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
4980
4981#endif /* __EMMINTRIN_H */
static __inline unsigned char unsigned int __x
Definition adxintrin.h:22
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition adxintrin.h:24
static __inline__ vector float vector float vector float __c
Definition altivec.h:4243
static __inline__ vector float vector float __b
Definition altivec.h:520
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition emmintrin.h:1508
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, __m64 __q1)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 64-bit integral ...
Definition emmintrin.h:3862
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1044
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi16(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-3) values from each of the two 128-bit vectors of [8 x i16] and interl...
Definition emmintrin.h:4659
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a)
Moves the 64-bit operand to a 128-bit integer vector, zeroing the upper bits.
Definition emmintrin.h:4737
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd1(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
Definition emmintrin.h:2013
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0)
Initializes the 16-bit values in a 128-bit vector of [8 x i16] with the specified 16-bit integer valu...
Definition emmintrin.h:3699
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1018
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
Definition emmintrin.h:1855
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit unsigned integer...
Definition emmintrin.h:4331
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the smaller value f...
Definition emmintrin.h:2432
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:577
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:56
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
Definition emmintrin.h:201
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
Definition emmintrin.h:117
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit integer vector.
Definition emmintrin.h:4879
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
Definition emmintrin.h:389
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si32(void const *__a)
Loads a 32-bit integer value to the low element of a 128-bit integer vector and clears the upper elem...
Definition emmintrin.h:1682
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
Definition emmintrin.h:4399
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2897
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition emmintrin.h:2743
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:813
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1198
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an unaligned memory location.
Definition emmintrin.h:1641
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition emmintrin.h:2643
static __inline__ void __DEFAULT_FN_ATTRS _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p)
Moves bytes selected by the mask from the first operand to the specified unaligned memory location.
Definition emmintrin.h:4104
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1172
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, int __i1, int __i0)
Initializes the 32-bit values in a 128-bit vector of [4 x i32] with the specified 32-bit integer valu...
Definition emmintrin.h:3659
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1224
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [16 x i8] vectors,...
Definition emmintrin.h:2222
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_pd(double const *__dp)
Loads a 128-bit floating-point vector of [2 x double] from an aligned memory location.
Definition emmintrin.h:1579
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
Definition emmintrin.h:1338
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality.
Definition emmintrin.h:3171
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3095
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w)
Constructs a 128-bit floating-point vector of [2 x double], with each of the two double-precision flo...
Definition emmintrin.h:1837
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
Definition emmintrin.h:3315
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double] initialized with the specified double-prec...
Definition emmintrin.h:1875
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, __m128i __b)
Subtracts the corresponding 16-bit integer values in the operands.
Definition emmintrin.h:2588
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:734
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:181
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_epi64(__m128i_u *__p, __m128i __a)
Stores the lower 64 bits of a 128-bit integer vector of [2 x i64] to a memory location.
Definition emmintrin.h:4123
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
Definition emmintrin.h:3336
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_or_si128(__m128i __a, __m128i __b)
Performs a bitwise OR of two 128-bit integer vectors.
Definition emmintrin.h:2780
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:509
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si16(void *__p, __m128i __b)
Stores a 16-bit integer value from the low element of a 128-bit integer vector.
Definition emmintrin.h:4074
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
Definition emmintrin.h:288
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load_sd(double const *__dp)
Loads a 64-bit double-precision value to the low element of a 128-bit integer vector and clears the u...
Definition emmintrin.h:1724
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si16(void const *__a)
Loads a 16-bit integer value to the low element of a 128-bit integer vector and clears the upper elem...
Definition emmintrin.h:1703
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
Definition emmintrin.h:4776
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:759
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_andnot_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors, using the one's complement of the values conta...
Definition emmintrin.h:2763
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
Definition emmintrin.h:3229
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3114
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding 16-bit values of the 128-bit integer vectors for equality.
Definition emmintrin.h:3190
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the lower 16 bits of ea...
Definition emmintrin.h:2492
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1146
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi16(__m128i __a, int __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2955
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
Definition emmintrin.h:407
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu8(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit unsigned [16 x i8] vectors, saving the greater value f...
Definition emmintrin.h:2392
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
Definition emmintrin.h:2326
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_pd(double *__dp, __m128d __a)
Moves the lower 64 bits of a 128-bit vector of [2 x double] twice to the upper and lower 64 bits of a...
Definition emmintrin.h:1992
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:4947
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi16(__m128i __a, __m128i __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3076
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi8(__m128i __a, __m128i __b)
Unpacks the low-order (index 0-7) values from two 128-bit vectors of [16 x i8] and interleaves them i...
Definition emmintrin.h:4631
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:785
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi16(__m128i __a, __m128i __b)
Compares each of the corresponding signed 16-bit values of the 128-bit integer vectors to determine i...
Definition emmintrin.h:3252
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:967
#define __DEFAULT_FN_ATTRS
Definition emmintrin.h:37
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
Definition emmintrin.h:4797
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit signed integer values in the input and returns the differences in the c...
Definition emmintrin.h:2664
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:709
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnge_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:661
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
Definition emmintrin.h:4703
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2995
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a)
Extracts the sign bits of the double-precision values in the 128-bit vector of [2 x double],...
Definition emmintrin.h:4816
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_si128(__m128i *__p, __m128i __a)
Stores a 128-bit integer vector to a 128-bit aligned memory location.
Definition emmintrin.h:4165
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding 32-bit values of the 128-bit integer vectors for equality.
Definition emmintrin.h:3209
static __inline__ void int __a
Definition emmintrin.h:4185
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:139
void _mm_mfence(void)
Forces strong memory ordering (serialization) between load and store instructions preceding this inst...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit unsigned integer values in the input and returns the differences in th...
Definition emmintrin.h:2725
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si32(void *__p, __m128i __b)
Stores a 32-bit integer value from the low element of a 128-bit integer vector.
Definition emmintrin.h:4053
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a)
Moves the lower 64 bits of a 128-bit integer vector to a 128-bit integer vector, zeroing the upper bi...
Definition emmintrin.h:4755
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmple_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:467
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
Definition emmintrin.h:4596
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a)
Converts the low-order element of a [2 x double] vector into a 32-bit signed integer value,...
Definition emmintrin.h:1491
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition emmintrin.h:3412
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
Definition emmintrin.h:4682
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpgt_epi32(__m128i __a, __m128i __b)
Compares each of the corresponding signed 32-bit values of the 128-bit integer vectors to determine i...
Definition emmintrin.h:3273
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, __m128 __b)
Converts the lower single-precision floating-point element of a 128-bit vector of [4 x float],...
Definition emmintrin.h:1449
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
Definition emmintrin.h:244
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1932
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi16(__m128i __a, __m128i __b)
Unpacks the high-order (index 4-7) values from two 128-bit vectors of [8 x i16] and interleaves them ...
Definition emmintrin.h:4552
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition emmintrin.h:1528
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition emmintrin.h:1358
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3587
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit integer vector.
Definition emmintrin.h:4913
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1250
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_madd_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two 128-bit signed [8 x i16] vectors, producing eight interm...
Definition emmintrin.h:2352
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [8 x i16] vectors,...
Definition emmintrin.h:2286
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtsi32_si128(int __a)
Returns a vector of [4 x i32] where the lowest element is the input operand and the remaining element...
Definition emmintrin.h:3461
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si64(void const *__a)
Loads a 64-bit integer value to the low element of a 128-bit integer vector and clears the upper elem...
Definition emmintrin.h:1661
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3133
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi16(__m128i __a, int __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2840
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:598
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, __m128d __b)
Converts the lower double-precision floating-point element of a 128-bit vector of [2 x double],...
Definition emmintrin.h:1400
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsd_si32(__m128d __a)
Converts the low-order element of a 128-bit vector of [2 x double] into a 32-bit signed integer value...
Definition emmintrin.h:1375
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:98
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadl_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the low-order bits of a 128-bit vector of [2 x double].
Definition emmintrin.h:1778
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epu8(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
Definition emmintrin.h:2265
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_adds_epi16(__m128i __a, __m128i __b)
Adds, with saturation, the corresponding elements of two 128-bit signed [8 x i16] vectors,...
Definition emmintrin.h:2244
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epi16(__m128i __a, __m128i __b)
Subtracts corresponding 16-bit signed integer values in the input and returns the differences in the ...
Definition emmintrin.h:2685
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi8(__m128i __a, __m128i __b)
Subtracts the corresponding 8-bit integer values in the operands.
Definition emmintrin.h:2570
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] with unspecified content.
Definition emmintrin.h:1799
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
Definition emmintrin.h:4575
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition emmintrin.h:1295
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 8-bit integral v...
Definition emmintrin.h:3963
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epi16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two signed [8 x i16] vectors, saving the upper 16 bits of ea...
Definition emmintrin.h:2452
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1096
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:556
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
Definition emmintrin.h:2201
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mulhi_epu16(__m128i __a, __m128i __b)
Multiplies the corresponding elements of two unsigned [8 x i16] vectors, saving the upper 16 bits of ...
Definition emmintrin.h:2472
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
Definition emmintrin.h:371
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:992
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the greater value fro...
Definition emmintrin.h:2372
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a)
Casts a 128-bit floating-point vector of [2 x double] into a 128-bit floating-point vector of [4 x fl...
Definition emmintrin.h:4862
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2935
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi16(__m128i __a, __m128i __b)
Converts 16-bit signed integers from both 128-bit integer vector operands into 8-bit signed integers,...
Definition emmintrin.h:4275
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition emmintrin.h:3804
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi16(__m128i __a, __m128i __count)
Right-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2975
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadr_pd(double const *__dp)
Loads two double-precision values, in reverse order, from an aligned memory location into a 128-bit v...
Definition emmintrin.h:1623
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1070
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:640
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a)
Casts a 128-bit floating-point vector of [4 x float] into a 128-bit floating-point vector of [2 x dou...
Definition emmintrin.h:4896
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pd(double *__dp, __m128d __a)
Stores the upper 64 bits of a 128-bit vector of [2 x double] to a memory location.
Definition emmintrin.h:2072
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2878
void _mm_lfence(void)
Forces strong memory ordering (serialization) between load instructions preceding this instruction an...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sad_epu8(__m128i __a, __m128i __b)
Computes the absolute differences of corresponding 8-bit integer values in two 128-bit vectors.
Definition emmintrin.h:2552
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpngt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:941
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpunord_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:841
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
Definition emmintrin.h:2092
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_pd(double *__dp, __m128d __a)
Moves packed double-precision values from a 128-bit vector of [2 x double] to a memory location.
Definition emmintrin.h:1972
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
Definition emmintrin.h:2161
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadu_si128(__m128i_u const *__p)
Moves packed integer values from an unaligned 128-bit memory location to elements in a 128-bit intege...
Definition emmintrin.h:3548
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
Definition emmintrin.h:3842
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packs_epi32(__m128i __a, __m128i __b)
Converts 32-bit signed integers from both 128-bit integer vector operands into 16-bit signed integers...
Definition emmintrin.h:4303
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi16(__m128i __a, __m128i __count)
Left-shifts each 16-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2859
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi8(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [16 x i8], saving the lower 8 bits of each ...
Definition emmintrin.h:2117
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:916
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpgt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:488
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_load_si128(__m128i const *__p)
Moves packed integer values from an aligned 128-bit memory location to elements in a 128-bit integer ...
Definition emmintrin.h:3532
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:684
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding signed 8-bit values of the 128-bit integer vectors to determine if...
Definition emmintrin.h:3294
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a)
Casts a 128-bit integer vector into a 128-bit floating-point vector of [4 x float].
Definition emmintrin.h:4930
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1911
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
Definition emmintrin.h:350
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, long long __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
Definition emmintrin.h:3609
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, int __b)
Converts a 32-bit signed integer value, in the second parameter, into a double-precision floating-poi...
Definition emmintrin.h:1423
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition emmintrin.h:2530
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnlt_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:891
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3152
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpneq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:866
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32], truncating the result when it is inexact...
Definition emmintrin.h:3445
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 16-bit integral ...
Definition emmintrin.h:3916
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadh_pd(__m128d __a, double const *__dp)
Loads a double-precision value into the high-order bits of a 128-bit vector of [2 x double].
Definition emmintrin.h:1751
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Initializes the 8-bit values in a 128-bit vector of [16 x i8] with the specified 8-bit integer values...
Definition emmintrin.h:3747
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q)
Initializes both values in a 128-bit vector of [2 x i64] with the specified 64-bit value.
Definition emmintrin.h:3785
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_sd(__m128d __a, __m128d __b)
Calculates the square root of the lower double-precision value of the second operand and returns it i...
Definition emmintrin.h:226
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, double __x)
Constructs a 128-bit floating-point vector of [2 x double], initialized in reverse order with the spe...
Definition emmintrin.h:1896
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, __m64 __q0)
Initializes both 64-bit values in a 128-bit vector of [2 x i64] with the specified 64-bit integer val...
Definition emmintrin.h:3631
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b)
Subtracts signed or unsigned 64-bit integer values and writes the difference to the corresponding bit...
Definition emmintrin.h:2625
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the lesser of the pair of...
Definition emmintrin.h:268
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2916
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
Definition emmintrin.h:75
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pd(double *__p, __m128d __a)
Stores a 128-bit floating point vector of [2 x double] to a 128-bit aligned memory location.
Definition emmintrin.h:4146
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si64(void *__p, __m128i __b)
Stores a 64-bit integer value from the low element of a 128-bit integer vector.
Definition emmintrin.h:4032
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_sd(double *__dp, __m128d __a)
Stores the lower 64 bits of a 128-bit vector of [2 x double] to a memory location.
Definition emmintrin.h:1950
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_load1_pd(double const *__dp)
Loads a double-precision floating-point value from a specified memory location and duplicates it to b...
Definition emmintrin.h:1597
#define __DEFAULT_FN_ATTRS_MMX
Definition emmintrin.h:38
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
Definition emmintrin.h:332
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi16(__m128i __a, __m128i __b)
Compares corresponding elements of two 128-bit signed [8 x i16] vectors, saving the smaller value fro...
Definition emmintrin.h:2412
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a)
Returns the lower 64 bits of a 128-bit integer vector as a 64-bit integer.
Definition emmintrin.h:4720
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_si128(__m128i *__p, __m128i __b)
Stores a 128-bit integer vector to a memory location aligned on a 128-bit boundary.
Definition emmintrin.h:3995
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
Definition emmintrin.h:158
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpeq_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] for...
Definition emmintrin.h:426
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1819
static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a)
Converts the two signed 32-bit integer elements of a 64-bit vector of [2 x i32] into two double-preci...
Definition emmintrin.h:1545
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_sd(__m128d __a, __m128d __b)
Compares lower 64-bit double-precision values of both operands, and returns the greater of the pair o...
Definition emmintrin.h:312
static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a)
Returns the low-order element of a 128-bit vector of [2 x double] as a double-precision floating-poin...
Definition emmintrin.h:1562
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_loadl_epi64(__m128i_u const *__p)
Returns a vector of [2 x i64] where the lower element is taken from the lower element of the operand,...
Definition emmintrin.h:3569
void _mm_pause(void)
Indicates that a spin loop is being executed for the purposes of optimizing power consumption during ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w)
Initializes all values in a 128-bit vector of [8 x i16] with the specified 16-bit value.
Definition emmintrin.h:3823
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
Definition emmintrin.h:3766
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi128_si32(__m128i __a)
Moves the least significant 32 bits of a vector of [4 x i32] to a 32-bit signed integer value.
Definition emmintrin.h:3496
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition emmintrin.h:2606
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b)
Computes the rounded averages of corresponding elements of two 128-bit unsigned [16 x i8] vectors,...
Definition emmintrin.h:2306
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, __m64 __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the two 64-bit integer vecto...
Definition emmintrin.h:2511
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_pd(double *__dp, __m128d __a)
Stores two double-precision values, in reverse order, from a 128-bit vector of [2 x double] to a 16-b...
Definition emmintrin.h:2054
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi8(__m128i __a, __m128i __b)
Unpacks the high-order (index 8-15) values from two 128-bit vectors of [16 x i8] and interleaves them...
Definition emmintrin.h:4525
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi16(__m128i __a, int __count)
Right-shifts each of 16-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3057
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a)
Stores a 128-bit vector of [2 x double] into an unaligned memory location.
Definition emmintrin.h:2031
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1276
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [8 x i16], saving the lower 16 bits of each...
Definition emmintrin.h:2139
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_subs_epu8(__m128i __a, __m128i __b)
Subtracts corresponding 8-bit unsigned integer values in the input and returns the differences in the...
Definition emmintrin.h:2705
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b)
Adds two signed or unsigned 64-bit integer values, returning the lower 64 bits of the sum.
Definition emmintrin.h:2179
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
Definition emmintrin.h:1315
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3977
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_sd(__m128d __a, __m128d __b)
Compares the lower double-precision floating-point values in each of the two 128-bit floating-point v...
Definition emmintrin.h:1122
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpord_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:532
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmplt_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:446
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_si128(__m128i_u *__p, __m128i __b)
Stores a 128-bit integer vector to an unaligned memory location.
Definition emmintrin.h:4011
double __m128d __attribute__((__vector_size__(16), __aligned__(16)))
Definition emmintrin.h:15
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:3015
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, int __i2, int __i3)
Constructs a 128-bit integer vector, initialized in reverse order with the specified 32-bit integral ...
Definition emmintrin.h:3885
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttpd_epi32(__m128d __a)
Converts the two double-precision floating-point elements of a 128-bit vector of [2 x double] into tw...
Definition emmintrin.h:1473
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition emmintrin.h:3428
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_xor_si128(__m128i __a, __m128i __b)
Performs a bitwise exclusive OR of two 128-bit integer vectors.
Definition emmintrin.h:2798
void _mm_clflush(void const *__p)
The cache line containing __p is flushed and invalidated from all caches in the coherency domain.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cmpnle_pd(__m128d __a, __m128d __b)
Compares each of the corresponding double-precision values of the 128-bit vectors of [2 x double] to ...
Definition emmintrin.h:619
struct __storeu_i16 *__P __v
Definition immintrin.h:348