ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
xmmintrin.h
Go to the documentation of this file.
1/*===---- xmmintrin.h - SSE intrinsics -------------------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __XMMINTRIN_H
11#define __XMMINTRIN_H
12
13#include <mmintrin.h>
14
15typedef int __v4si __attribute__((__vector_size__(16)));
16typedef float __v4sf __attribute__((__vector_size__(16)));
17typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
18
19typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1)));
20
21/* Unsigned types */
22typedef unsigned int __v4su __attribute__((__vector_size__(16)));
23
24/* This header should only be included in a hosted environment as it depends on
25 * a standard library to provide allocation routines. */
26#if __STDC_HOSTED__
27#include <mm_malloc.h>
28#endif
29
30/* Define the default attributes for the functions in this file. */
31#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128)))
32#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64)))
33
49static __inline__ __m128 __DEFAULT_FN_ATTRS
50_mm_add_ss(__m128 __a, __m128 __b)
51{
52 __a[0] += __b[0];
53 return __a;
54}
55
69static __inline__ __m128 __DEFAULT_FN_ATTRS
70_mm_add_ps(__m128 __a, __m128 __b)
71{
72 return (__m128)((__v4sf)__a + (__v4sf)__b);
73}
74
91static __inline__ __m128 __DEFAULT_FN_ATTRS
92_mm_sub_ss(__m128 __a, __m128 __b)
93{
94 __a[0] -= __b[0];
95 return __a;
96}
97
112static __inline__ __m128 __DEFAULT_FN_ATTRS
113_mm_sub_ps(__m128 __a, __m128 __b)
114{
115 return (__m128)((__v4sf)__a - (__v4sf)__b);
116}
117
134static __inline__ __m128 __DEFAULT_FN_ATTRS
135_mm_mul_ss(__m128 __a, __m128 __b)
136{
137 __a[0] *= __b[0];
138 return __a;
139}
140
154static __inline__ __m128 __DEFAULT_FN_ATTRS
155_mm_mul_ps(__m128 __a, __m128 __b)
156{
157 return (__m128)((__v4sf)__a * (__v4sf)__b);
158}
159
176static __inline__ __m128 __DEFAULT_FN_ATTRS
177_mm_div_ss(__m128 __a, __m128 __b)
178{
179 __a[0] /= __b[0];
180 return __a;
181}
182
195static __inline__ __m128 __DEFAULT_FN_ATTRS
196_mm_div_ps(__m128 __a, __m128 __b)
197{
198 return (__m128)((__v4sf)__a / (__v4sf)__b);
199}
200
213static __inline__ __m128 __DEFAULT_FN_ATTRS
215{
216 return (__m128)__builtin_ia32_sqrtss((__v4sf)__a);
217}
218
230static __inline__ __m128 __DEFAULT_FN_ATTRS
232{
233 return __builtin_ia32_sqrtps((__v4sf)__a);
234}
235
248static __inline__ __m128 __DEFAULT_FN_ATTRS
250{
251 return (__m128)__builtin_ia32_rcpss((__v4sf)__a);
252}
253
265static __inline__ __m128 __DEFAULT_FN_ATTRS
267{
268 return (__m128)__builtin_ia32_rcpps((__v4sf)__a);
269}
270
284static __inline__ __m128 __DEFAULT_FN_ATTRS
286{
287 return __builtin_ia32_rsqrtss((__v4sf)__a);
288}
289
301static __inline__ __m128 __DEFAULT_FN_ATTRS
303{
304 return __builtin_ia32_rsqrtps((__v4sf)__a);
305}
306
324static __inline__ __m128 __DEFAULT_FN_ATTRS
325_mm_min_ss(__m128 __a, __m128 __b)
326{
327 return __builtin_ia32_minss((__v4sf)__a, (__v4sf)__b);
328}
329
343static __inline__ __m128 __DEFAULT_FN_ATTRS
344_mm_min_ps(__m128 __a, __m128 __b)
345{
346 return __builtin_ia32_minps((__v4sf)__a, (__v4sf)__b);
347}
348
366static __inline__ __m128 __DEFAULT_FN_ATTRS
367_mm_max_ss(__m128 __a, __m128 __b)
368{
369 return __builtin_ia32_maxss((__v4sf)__a, (__v4sf)__b);
370}
371
385static __inline__ __m128 __DEFAULT_FN_ATTRS
386_mm_max_ps(__m128 __a, __m128 __b)
387{
388 return __builtin_ia32_maxps((__v4sf)__a, (__v4sf)__b);
389}
390
403static __inline__ __m128 __DEFAULT_FN_ATTRS
404_mm_and_ps(__m128 __a, __m128 __b)
405{
406 return (__m128)((__v4su)__a & (__v4su)__b);
407}
408
425static __inline__ __m128 __DEFAULT_FN_ATTRS
426_mm_andnot_ps(__m128 __a, __m128 __b)
427{
428 return (__m128)(~(__v4su)__a & (__v4su)__b);
429}
430
443static __inline__ __m128 __DEFAULT_FN_ATTRS
444_mm_or_ps(__m128 __a, __m128 __b)
445{
446 return (__m128)((__v4su)__a | (__v4su)__b);
447}
448
462static __inline__ __m128 __DEFAULT_FN_ATTRS
463_mm_xor_ps(__m128 __a, __m128 __b)
464{
465 return (__m128)((__v4su)__a ^ (__v4su)__b);
466}
467
484static __inline__ __m128 __DEFAULT_FN_ATTRS
485_mm_cmpeq_ss(__m128 __a, __m128 __b)
486{
487 return (__m128)__builtin_ia32_cmpeqss((__v4sf)__a, (__v4sf)__b);
488}
489
502static __inline__ __m128 __DEFAULT_FN_ATTRS
503_mm_cmpeq_ps(__m128 __a, __m128 __b)
504{
505 return (__m128)__builtin_ia32_cmpeqps((__v4sf)__a, (__v4sf)__b);
506}
507
525static __inline__ __m128 __DEFAULT_FN_ATTRS
526_mm_cmplt_ss(__m128 __a, __m128 __b)
527{
528 return (__m128)__builtin_ia32_cmpltss((__v4sf)__a, (__v4sf)__b);
529}
530
544static __inline__ __m128 __DEFAULT_FN_ATTRS
545_mm_cmplt_ps(__m128 __a, __m128 __b)
546{
547 return (__m128)__builtin_ia32_cmpltps((__v4sf)__a, (__v4sf)__b);
548}
549
568static __inline__ __m128 __DEFAULT_FN_ATTRS
569_mm_cmple_ss(__m128 __a, __m128 __b)
570{
571 return (__m128)__builtin_ia32_cmpless((__v4sf)__a, (__v4sf)__b);
572}
573
587static __inline__ __m128 __DEFAULT_FN_ATTRS
588_mm_cmple_ps(__m128 __a, __m128 __b)
589{
590 return (__m128)__builtin_ia32_cmpleps((__v4sf)__a, (__v4sf)__b);
591}
592
610static __inline__ __m128 __DEFAULT_FN_ATTRS
611_mm_cmpgt_ss(__m128 __a, __m128 __b)
612{
613 return (__m128)__builtin_shufflevector((__v4sf)__a,
614 (__v4sf)__builtin_ia32_cmpltss((__v4sf)__b, (__v4sf)__a),
615 4, 1, 2, 3);
616}
617
631static __inline__ __m128 __DEFAULT_FN_ATTRS
632_mm_cmpgt_ps(__m128 __a, __m128 __b)
633{
634 return (__m128)__builtin_ia32_cmpltps((__v4sf)__b, (__v4sf)__a);
635}
636
655static __inline__ __m128 __DEFAULT_FN_ATTRS
656_mm_cmpge_ss(__m128 __a, __m128 __b)
657{
658 return (__m128)__builtin_shufflevector((__v4sf)__a,
659 (__v4sf)__builtin_ia32_cmpless((__v4sf)__b, (__v4sf)__a),
660 4, 1, 2, 3);
661}
662
676static __inline__ __m128 __DEFAULT_FN_ATTRS
677_mm_cmpge_ps(__m128 __a, __m128 __b)
678{
679 return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
680}
681
699static __inline__ __m128 __DEFAULT_FN_ATTRS
700_mm_cmpneq_ss(__m128 __a, __m128 __b)
701{
702 return (__m128)__builtin_ia32_cmpneqss((__v4sf)__a, (__v4sf)__b);
703}
704
718static __inline__ __m128 __DEFAULT_FN_ATTRS
719_mm_cmpneq_ps(__m128 __a, __m128 __b)
720{
721 return (__m128)__builtin_ia32_cmpneqps((__v4sf)__a, (__v4sf)__b);
722}
723
742static __inline__ __m128 __DEFAULT_FN_ATTRS
743_mm_cmpnlt_ss(__m128 __a, __m128 __b)
744{
745 return (__m128)__builtin_ia32_cmpnltss((__v4sf)__a, (__v4sf)__b);
746}
747
762static __inline__ __m128 __DEFAULT_FN_ATTRS
763_mm_cmpnlt_ps(__m128 __a, __m128 __b)
764{
765 return (__m128)__builtin_ia32_cmpnltps((__v4sf)__a, (__v4sf)__b);
766}
767
787static __inline__ __m128 __DEFAULT_FN_ATTRS
788_mm_cmpnle_ss(__m128 __a, __m128 __b)
789{
790 return (__m128)__builtin_ia32_cmpnless((__v4sf)__a, (__v4sf)__b);
791}
792
807static __inline__ __m128 __DEFAULT_FN_ATTRS
808_mm_cmpnle_ps(__m128 __a, __m128 __b)
809{
810 return (__m128)__builtin_ia32_cmpnleps((__v4sf)__a, (__v4sf)__b);
811}
812
832static __inline__ __m128 __DEFAULT_FN_ATTRS
833_mm_cmpngt_ss(__m128 __a, __m128 __b)
834{
835 return (__m128)__builtin_shufflevector((__v4sf)__a,
836 (__v4sf)__builtin_ia32_cmpnltss((__v4sf)__b, (__v4sf)__a),
837 4, 1, 2, 3);
838}
839
854static __inline__ __m128 __DEFAULT_FN_ATTRS
855_mm_cmpngt_ps(__m128 __a, __m128 __b)
856{
857 return (__m128)__builtin_ia32_cmpnltps((__v4sf)__b, (__v4sf)__a);
858}
859
879static __inline__ __m128 __DEFAULT_FN_ATTRS
880_mm_cmpnge_ss(__m128 __a, __m128 __b)
881{
882 return (__m128)__builtin_shufflevector((__v4sf)__a,
883 (__v4sf)__builtin_ia32_cmpnless((__v4sf)__b, (__v4sf)__a),
884 4, 1, 2, 3);
885}
886
901static __inline__ __m128 __DEFAULT_FN_ATTRS
902_mm_cmpnge_ps(__m128 __a, __m128 __b)
903{
904 return (__m128)__builtin_ia32_cmpnleps((__v4sf)__b, (__v4sf)__a);
905}
906
926static __inline__ __m128 __DEFAULT_FN_ATTRS
927_mm_cmpord_ss(__m128 __a, __m128 __b)
928{
929 return (__m128)__builtin_ia32_cmpordss((__v4sf)__a, (__v4sf)__b);
930}
931
946static __inline__ __m128 __DEFAULT_FN_ATTRS
947_mm_cmpord_ps(__m128 __a, __m128 __b)
948{
949 return (__m128)__builtin_ia32_cmpordps((__v4sf)__a, (__v4sf)__b);
950}
951
971static __inline__ __m128 __DEFAULT_FN_ATTRS
972_mm_cmpunord_ss(__m128 __a, __m128 __b)
973{
974 return (__m128)__builtin_ia32_cmpunordss((__v4sf)__a, (__v4sf)__b);
975}
976
991static __inline__ __m128 __DEFAULT_FN_ATTRS
992_mm_cmpunord_ps(__m128 __a, __m128 __b)
993{
994 return (__m128)__builtin_ia32_cmpunordps((__v4sf)__a, (__v4sf)__b);
995}
996
1015static __inline__ int __DEFAULT_FN_ATTRS
1016_mm_comieq_ss(__m128 __a, __m128 __b)
1017{
1018 return __builtin_ia32_comieq((__v4sf)__a, (__v4sf)__b);
1019}
1020
1040static __inline__ int __DEFAULT_FN_ATTRS
1041_mm_comilt_ss(__m128 __a, __m128 __b)
1042{
1043 return __builtin_ia32_comilt((__v4sf)__a, (__v4sf)__b);
1044}
1045
1064static __inline__ int __DEFAULT_FN_ATTRS
1065_mm_comile_ss(__m128 __a, __m128 __b)
1066{
1067 return __builtin_ia32_comile((__v4sf)__a, (__v4sf)__b);
1068}
1069
1088static __inline__ int __DEFAULT_FN_ATTRS
1089_mm_comigt_ss(__m128 __a, __m128 __b)
1090{
1091 return __builtin_ia32_comigt((__v4sf)__a, (__v4sf)__b);
1092}
1093
1112static __inline__ int __DEFAULT_FN_ATTRS
1113_mm_comige_ss(__m128 __a, __m128 __b)
1114{
1115 return __builtin_ia32_comige((__v4sf)__a, (__v4sf)__b);
1116}
1117
1136static __inline__ int __DEFAULT_FN_ATTRS
1137_mm_comineq_ss(__m128 __a, __m128 __b)
1138{
1139 return __builtin_ia32_comineq((__v4sf)__a, (__v4sf)__b);
1140}
1141
1160static __inline__ int __DEFAULT_FN_ATTRS
1161_mm_ucomieq_ss(__m128 __a, __m128 __b)
1162{
1163 return __builtin_ia32_ucomieq((__v4sf)__a, (__v4sf)__b);
1164}
1165
1184static __inline__ int __DEFAULT_FN_ATTRS
1185_mm_ucomilt_ss(__m128 __a, __m128 __b)
1186{
1187 return __builtin_ia32_ucomilt((__v4sf)__a, (__v4sf)__b);
1188}
1189
1209static __inline__ int __DEFAULT_FN_ATTRS
1210_mm_ucomile_ss(__m128 __a, __m128 __b)
1211{
1212 return __builtin_ia32_ucomile((__v4sf)__a, (__v4sf)__b);
1213}
1214
1234static __inline__ int __DEFAULT_FN_ATTRS
1235_mm_ucomigt_ss(__m128 __a, __m128 __b)
1236{
1237 return __builtin_ia32_ucomigt((__v4sf)__a, (__v4sf)__b);
1238}
1239
1259static __inline__ int __DEFAULT_FN_ATTRS
1260_mm_ucomige_ss(__m128 __a, __m128 __b)
1261{
1262 return __builtin_ia32_ucomige((__v4sf)__a, (__v4sf)__b);
1263}
1264
1283static __inline__ int __DEFAULT_FN_ATTRS
1284_mm_ucomineq_ss(__m128 __a, __m128 __b)
1285{
1286 return __builtin_ia32_ucomineq((__v4sf)__a, (__v4sf)__b);
1287}
1288
1301static __inline__ int __DEFAULT_FN_ATTRS
1303{
1304 return __builtin_ia32_cvtss2si((__v4sf)__a);
1305}
1306
1319static __inline__ int __DEFAULT_FN_ATTRS
1321{
1322 return _mm_cvtss_si32(__a);
1323}
1324
1325#ifdef __x86_64__
1326
1339static __inline__ long long __DEFAULT_FN_ATTRS
1340_mm_cvtss_si64(__m128 __a)
1341{
1342 return __builtin_ia32_cvtss2si64((__v4sf)__a);
1343}
1344
1345#endif
1346
1357static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1359{
1360 return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a);
1361}
1362
1373static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1375{
1376 return _mm_cvtps_pi32(__a);
1377}
1378
1392static __inline__ int __DEFAULT_FN_ATTRS
1394{
1395 return __builtin_ia32_cvttss2si((__v4sf)__a);
1396}
1397
1411static __inline__ int __DEFAULT_FN_ATTRS
1413{
1414 return _mm_cvttss_si32(__a);
1415}
1416
1417#ifdef __x86_64__
1431static __inline__ long long __DEFAULT_FN_ATTRS
1432_mm_cvttss_si64(__m128 __a)
1433{
1434 return __builtin_ia32_cvttss2si64((__v4sf)__a);
1435}
1436#endif
1437
1450static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1452{
1453 return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);
1454}
1455
1467static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
1469{
1470 return _mm_cvttps_pi32(__a);
1471}
1472
1489static __inline__ __m128 __DEFAULT_FN_ATTRS
1491{
1492 __a[0] = __b;
1493 return __a;
1494}
1495
1512static __inline__ __m128 __DEFAULT_FN_ATTRS
1514{
1515 return _mm_cvtsi32_ss(__a, __b);
1516}
1517
1518#ifdef __x86_64__
1519
1536static __inline__ __m128 __DEFAULT_FN_ATTRS
1537_mm_cvtsi64_ss(__m128 __a, long long __b)
1538{
1539 __a[0] = __b;
1540 return __a;
1541}
1542
1543#endif
1544
1562static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
1563_mm_cvtpi32_ps(__m128 __a, __m64 __b)
1564{
1565 return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b);
1566}
1567
1585static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
1586_mm_cvt_pi2ps(__m128 __a, __m64 __b)
1587{
1588 return _mm_cvtpi32_ps(__a, __b);
1589}
1590
1602static __inline__ float __DEFAULT_FN_ATTRS
1604{
1605 return __a[0];
1606}
1607
1623static __inline__ __m128 __DEFAULT_FN_ATTRS
1624_mm_loadh_pi(__m128 __a, const __m64 *__p)
1625{
1626 typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8)));
1627 struct __mm_loadh_pi_struct {
1628 __mm_loadh_pi_v2f32 __u;
1629 } __attribute__((__packed__, __may_alias__));
1630 __mm_loadh_pi_v2f32 __b = ((const struct __mm_loadh_pi_struct*)__p)->__u;
1631 __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
1632 return __builtin_shufflevector(__a, __bb, 0, 1, 4, 5);
1633}
1634
1650static __inline__ __m128 __DEFAULT_FN_ATTRS
1651_mm_loadl_pi(__m128 __a, const __m64 *__p)
1652{
1653 typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8)));
1654 struct __mm_loadl_pi_struct {
1655 __mm_loadl_pi_v2f32 __u;
1656 } __attribute__((__packed__, __may_alias__));
1657 __mm_loadl_pi_v2f32 __b = ((const struct __mm_loadl_pi_struct*)__p)->__u;
1658 __m128 __bb = __builtin_shufflevector(__b, __b, 0, 1, 0, 1);
1659 return __builtin_shufflevector(__a, __bb, 4, 5, 2, 3);
1660}
1661
1677static __inline__ __m128 __DEFAULT_FN_ATTRS
1678_mm_load_ss(const float *__p)
1679{
1680 struct __mm_load_ss_struct {
1681 float __u;
1682 } __attribute__((__packed__, __may_alias__));
1683 float __u = ((const struct __mm_load_ss_struct*)__p)->__u;
1684 return __extension__ (__m128){ __u, 0, 0, 0 };
1685}
1686
1699static __inline__ __m128 __DEFAULT_FN_ATTRS
1700_mm_load1_ps(const float *__p)
1701{
1702 struct __mm_load1_ps_struct {
1703 float __u;
1704 } __attribute__((__packed__, __may_alias__));
1705 float __u = ((const struct __mm_load1_ps_struct*)__p)->__u;
1706 return __extension__ (__m128){ __u, __u, __u, __u };
1707}
1708
1709#define _mm_load_ps1(p) _mm_load1_ps(p)
1710
1722static __inline__ __m128 __DEFAULT_FN_ATTRS
1723_mm_load_ps(const float *__p)
1724{
1725 return *(const __m128*)__p;
1726}
1727
1739static __inline__ __m128 __DEFAULT_FN_ATTRS
1740_mm_loadu_ps(const float *__p)
1741{
1742 struct __loadu_ps {
1743 __m128_u __v;
1744 } __attribute__((__packed__, __may_alias__));
1745 return ((const struct __loadu_ps*)__p)->__v;
1746}
1747
1761static __inline__ __m128 __DEFAULT_FN_ATTRS
1762_mm_loadr_ps(const float *__p)
1763{
1764 __m128 __a = _mm_load_ps(__p);
1765 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
1766}
1767
1775static __inline__ __m128 __DEFAULT_FN_ATTRS
1777{
1778 return (__m128)__builtin_ia32_undef128();
1779}
1780
1795static __inline__ __m128 __DEFAULT_FN_ATTRS
1796_mm_set_ss(float __w)
1797{
1798 return __extension__ (__m128){ __w, 0, 0, 0 };
1799}
1800
1813static __inline__ __m128 __DEFAULT_FN_ATTRS
1814_mm_set1_ps(float __w)
1815{
1816 return __extension__ (__m128){ __w, __w, __w, __w };
1817}
1818
1819/* Microsoft specific. */
1832static __inline__ __m128 __DEFAULT_FN_ATTRS
1833_mm_set_ps1(float __w)
1834{
1835 return _mm_set1_ps(__w);
1836}
1837
1859static __inline__ __m128 __DEFAULT_FN_ATTRS
1860_mm_set_ps(float __z, float __y, float __x, float __w)
1861{
1862 return __extension__ (__m128){ __w, __x, __y, __z };
1863}
1864
1887static __inline__ __m128 __DEFAULT_FN_ATTRS
1888_mm_setr_ps(float __z, float __y, float __x, float __w)
1889{
1890 return __extension__ (__m128){ __z, __y, __x, __w };
1891}
1892
1902static __inline__ __m128 __DEFAULT_FN_ATTRS
1904{
1905 return __extension__ (__m128){ 0, 0, 0, 0 };
1906}
1907
1919static __inline__ void __DEFAULT_FN_ATTRS
1920_mm_storeh_pi(__m64 *__p, __m128 __a)
1921{
1922 typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8)));
1923 struct __mm_storeh_pi_struct {
1924 __mm_storeh_pi_v2f32 __u;
1925 } __attribute__((__packed__, __may_alias__));
1926 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3);
1927}
1928
1940static __inline__ void __DEFAULT_FN_ATTRS
1941_mm_storel_pi(__m64 *__p, __m128 __a)
1942{
1943 typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8)));
1944 struct __mm_storeh_pi_struct {
1945 __mm_storeh_pi_v2f32 __u;
1946 } __attribute__((__packed__, __may_alias__));
1947 ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1);
1948}
1949
1961static __inline__ void __DEFAULT_FN_ATTRS
1962_mm_store_ss(float *__p, __m128 __a)
1963{
1964 struct __mm_store_ss_struct {
1965 float __u;
1966 } __attribute__((__packed__, __may_alias__));
1967 ((struct __mm_store_ss_struct*)__p)->__u = __a[0];
1968}
1969
1982static __inline__ void __DEFAULT_FN_ATTRS
1983_mm_storeu_ps(float *__p, __m128 __a)
1984{
1985 struct __storeu_ps {
1986 __m128_u __v;
1987 } __attribute__((__packed__, __may_alias__));
1988 ((struct __storeu_ps*)__p)->__v = __a;
1989}
1990
2003static __inline__ void __DEFAULT_FN_ATTRS
2004_mm_store_ps(float *__p, __m128 __a)
2005{
2006 *(__m128*)__p = __a;
2007}
2008
2022static __inline__ void __DEFAULT_FN_ATTRS
2023_mm_store1_ps(float *__p, __m128 __a)
2024{
2025 __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
2027}
2028
2042static __inline__ void __DEFAULT_FN_ATTRS
2043_mm_store_ps1(float *__p, __m128 __a)
2044{
2046}
2047
2061static __inline__ void __DEFAULT_FN_ATTRS
2062_mm_storer_ps(float *__p, __m128 __a)
2063{
2064 __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 3, 2, 1, 0);
2066}
2067
2068#define _MM_HINT_ET0 7
2069#define _MM_HINT_ET1 6
2070#define _MM_HINT_T0 3
2071#define _MM_HINT_T1 2
2072#define _MM_HINT_T2 1
2073#define _MM_HINT_NTA 0
2074
2075#ifndef _MSC_VER
2076/* FIXME: We have to #define this because "sel" must be a constant integer, and
2077 Sema doesn't do any form of constant propagation yet. */
2078
2103#define _mm_prefetch(a, sel) (__builtin_prefetch((const void *)(a), \
2104 ((sel) >> 2) & 1, (sel) & 0x3))
2105#endif
2106
2119static __inline__ void __DEFAULT_FN_ATTRS_MMX
2120_mm_stream_pi(__m64 *__p, __m64 __a)
2121{
2122 __builtin_ia32_movntq(__p, __a);
2123}
2124
2138static __inline__ void __DEFAULT_FN_ATTRS
2139_mm_stream_ps(float *__p, __m128 __a)
2140{
2141 __builtin_nontemporal_store((__v4sf)__a, (__v4sf*)__p);
2142}
2143
2144#if defined(__cplusplus)
2145extern "C" {
2146#endif
2147
2157void _mm_sfence(void);
2158
2159#if defined(__cplusplus)
2160} // extern "C"
2161#endif
2162
2183#define _mm_extract_pi16(a, n) \
2184 (int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)
2185
2214#define _mm_insert_pi16(a, d, n) \
2215 (__m64)__builtin_ia32_vec_set_v4hi((__v4hi)a, (int)d, (int)n)
2216
2230static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2231_mm_max_pi16(__m64 __a, __m64 __b)
2232{
2233 return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b);
2234}
2235
2249static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2250_mm_max_pu8(__m64 __a, __m64 __b)
2251{
2252 return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b);
2253}
2254
2268static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2269_mm_min_pi16(__m64 __a, __m64 __b)
2270{
2271 return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b);
2272}
2273
2287static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2288_mm_min_pu8(__m64 __a, __m64 __b)
2289{
2290 return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b);
2291}
2292
2305static __inline__ int __DEFAULT_FN_ATTRS_MMX
2307{
2308 return __builtin_ia32_pmovmskb((__v8qi)__a);
2309}
2310
2324static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2326{
2327 return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b);
2328}
2329
2361#define _mm_shuffle_pi16(a, n) \
2362 (__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))
2363
2387static __inline__ void __DEFAULT_FN_ATTRS_MMX
2388_mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
2389{
2390 __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p);
2391}
2392
2406static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2407_mm_avg_pu8(__m64 __a, __m64 __b)
2408{
2409 return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b);
2410}
2411
2425static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2426_mm_avg_pu16(__m64 __a, __m64 __b)
2427{
2428 return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b);
2429}
2430
2447static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2448_mm_sad_pu8(__m64 __a, __m64 __b)
2449{
2450 return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b);
2451}
2452
2453#if defined(__cplusplus)
2454extern "C" {
2455#endif
2456
2507unsigned int _mm_getcsr(void);
2508
2561void _mm_setcsr(unsigned int __i);
2562
2563#if defined(__cplusplus)
2564} // extern "C"
2565#endif
2566
2603#define _mm_shuffle_ps(a, b, mask) \
2604 (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
2605 (int)(mask))
2606
2623static __inline__ __m128 __DEFAULT_FN_ATTRS
2624_mm_unpackhi_ps(__m128 __a, __m128 __b)
2625{
2626 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7);
2627}
2628
2645static __inline__ __m128 __DEFAULT_FN_ATTRS
2646_mm_unpacklo_ps(__m128 __a, __m128 __b)
2647{
2648 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5);
2649}
2650
2667static __inline__ __m128 __DEFAULT_FN_ATTRS
2668_mm_move_ss(__m128 __a, __m128 __b)
2669{
2670 __a[0] = __b[0];
2671 return __a;
2672}
2673
2689static __inline__ __m128 __DEFAULT_FN_ATTRS
2690_mm_movehl_ps(__m128 __a, __m128 __b)
2691{
2692 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3);
2693}
2694
2710static __inline__ __m128 __DEFAULT_FN_ATTRS
2711_mm_movelh_ps(__m128 __a, __m128 __b)
2712{
2713 return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5);
2714}
2715
2728static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
2730{
2731 __m64 __b, __c;
2732 __m128 __r;
2733
2737 __r = _mm_setzero_ps();
2738 __r = _mm_cvtpi32_ps(__r, __c);
2739 __r = _mm_movelh_ps(__r, __r);
2741 __r = _mm_cvtpi32_ps(__r, __c);
2742
2743 return __r;
2744}
2745
2758static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
2760{
2761 __m64 __b, __c;
2762 __m128 __r;
2763
2766 __r = _mm_setzero_ps();
2767 __r = _mm_cvtpi32_ps(__r, __c);
2768 __r = _mm_movelh_ps(__r, __r);
2770 __r = _mm_cvtpi32_ps(__r, __c);
2771
2772 return __r;
2773}
2774
2787static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
2789{
2790 __m64 __b;
2791
2795
2796 return _mm_cvtpi16_ps(__b);
2797}
2798
2812static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
2814{
2815 __m64 __b;
2816
2819
2820 return _mm_cvtpi16_ps(__b);
2821}
2822
2839static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX
2841{
2842 __m128 __c;
2843
2844 __c = _mm_setzero_ps();
2847
2848 return _mm_cvtpi32_ps(__c, __a);
2849}
2850
2868static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2870{
2871 __m64 __b, __c;
2872
2876
2877 return _mm_packs_pi32(__b, __c);
2878}
2879
2898static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
2900{
2901 __m64 __b, __c;
2902
2905
2906 return _mm_packs_pi16(__b, __c);
2907}
2908
2923static __inline__ int __DEFAULT_FN_ATTRS
2925{
2926 return __builtin_ia32_movmskps((__v4sf)__a);
2927}
2928
2929
2930#define _MM_ALIGN16 __attribute__((aligned(16)))
2931
2932#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
2933
2934#define _MM_EXCEPT_INVALID (0x0001U)
2935#define _MM_EXCEPT_DENORM (0x0002U)
2936#define _MM_EXCEPT_DIV_ZERO (0x0004U)
2937#define _MM_EXCEPT_OVERFLOW (0x0008U)
2938#define _MM_EXCEPT_UNDERFLOW (0x0010U)
2939#define _MM_EXCEPT_INEXACT (0x0020U)
2940#define _MM_EXCEPT_MASK (0x003fU)
2941
2942#define _MM_MASK_INVALID (0x0080U)
2943#define _MM_MASK_DENORM (0x0100U)
2944#define _MM_MASK_DIV_ZERO (0x0200U)
2945#define _MM_MASK_OVERFLOW (0x0400U)
2946#define _MM_MASK_UNDERFLOW (0x0800U)
2947#define _MM_MASK_INEXACT (0x1000U)
2948#define _MM_MASK_MASK (0x1f80U)
2949
2950#define _MM_ROUND_NEAREST (0x0000U)
2951#define _MM_ROUND_DOWN (0x2000U)
2952#define _MM_ROUND_UP (0x4000U)
2953#define _MM_ROUND_TOWARD_ZERO (0x6000U)
2954#define _MM_ROUND_MASK (0x6000U)
2955
2956#define _MM_FLUSH_ZERO_MASK (0x8000U)
2957#define _MM_FLUSH_ZERO_ON (0x8000U)
2958#define _MM_FLUSH_ZERO_OFF (0x0000U)
2959
2960#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK)
2961#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK)
2962#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK)
2963#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK)
2964
2965#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x)))
2966#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x)))
2967#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x)))
2968#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x)))
2969
2970#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \
2971do { \
2972 __m128 tmp3, tmp2, tmp1, tmp0; \
2973 tmp0 = _mm_unpacklo_ps((row0), (row1)); \
2974 tmp2 = _mm_unpacklo_ps((row2), (row3)); \
2975 tmp1 = _mm_unpackhi_ps((row0), (row1)); \
2976 tmp3 = _mm_unpackhi_ps((row2), (row3)); \
2977 (row0) = _mm_movelh_ps(tmp0, tmp2); \
2978 (row1) = _mm_movehl_ps(tmp2, tmp0); \
2979 (row2) = _mm_movelh_ps(tmp1, tmp3); \
2980 (row3) = _mm_movehl_ps(tmp3, tmp1); \
2981} while (0)
2982
2983/* Aliases for compatibility. */
2984#define _m_pextrw _mm_extract_pi16
2985#define _m_pinsrw _mm_insert_pi16
2986#define _m_pmaxsw _mm_max_pi16
2987#define _m_pmaxub _mm_max_pu8
2988#define _m_pminsw _mm_min_pi16
2989#define _m_pminub _mm_min_pu8
2990#define _m_pmovmskb _mm_movemask_pi8
2991#define _m_pmulhuw _mm_mulhi_pu16
2992#define _m_pshufw _mm_shuffle_pi16
2993#define _m_maskmovq _mm_maskmove_si64
2994#define _m_pavgb _mm_avg_pu8
2995#define _m_pavgw _mm_avg_pu16
2996#define _m_psadbw _mm_sad_pu8
2997#define _m_ _mm_
2998#define _m_ _mm_
2999
3000#undef __DEFAULT_FN_ATTRS
3001#undef __DEFAULT_FN_ATTRS_MMX
3002
3003/* Ugly hack for backwards-compatibility (compatible with gcc) */
3004#if defined(__SSE2__) && !__building_module(_Builtin_intrinsics)
3005#include <emmintrin.h>
3006#endif
3007
3008#endif /* __XMMINTRIN_H */
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Zeroes the upper 128 bits (bits 255:128) of all YMM registers.
static __inline unsigned char unsigned int __x
Definition adxintrin.h:22
static __inline unsigned char unsigned int unsigned int __y
Definition adxintrin.h:22
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition adxintrin.h:24
static __inline__ vector float vector float vector float __c
Definition altivec.h:4243
static __inline__ vector float vector float __b
Definition altivec.h:520
static __inline__ void int __a
Definition emmintrin.h:4185
struct __storeu_i16 *__P __v
Definition immintrin.h:348
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition mmintrin.h:237
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
Definition mmintrin.h:285
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
Definition mmintrin.h:1247
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
Definition mmintrin.h:1225
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts 32-bit signed integers from both 64-bit integer vector parameters of [2 x i32] into 16-bit s...
Definition mmintrin.h:157
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
Definition mmintrin.h:308
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
Definition mmintrin.h:1282
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit si...
Definition mmintrin.h:127
static __inline__ int __DEFAULT_FN_ATTRS _mm_comigt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
Definition xmmintrin.h:1089
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttss_si32(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...
Definition xmmintrin.h:1393
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ss(__m128 __a)
Calculates the approximate reciprocal of the value stored in the low-order bits of a 128-bit vector o...
Definition xmmintrin.h:249
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:545
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ss(__m128 __a)
Calculates the square root of the value stored in the low-order bits of a 128-bit vector of [4 x floa...
Definition xmmintrin.h:214
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
Definition xmmintrin.h:196
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:880
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] for equa...
Definition xmmintrin.h:503
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:1903
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi32(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
Definition xmmintrin.h:1358
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ss(float __w)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:1796
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float].
Definition xmmintrin.h:404
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmplt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:526
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvt_ss2si(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...
Definition xmmintrin.h:1320
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpeq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands for equality and returns the ...
Definition xmmintrin.h:485
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
Definition xmmintrin.h:70
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ps(const float *__p)
Loads a 128-bit floating-point vector of [4 x float] from an aligned memory location.
Definition xmmintrin.h:1723
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mulhi_pu16(__m64 __a, __m64 __b)
Multiplies packed 16-bit unsigned integer values and writes the high-order 16 bits of each 32-bit pro...
Definition xmmintrin.h:2325
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] for ineq...
Definition xmmintrin.h:719
static __inline__ int __DEFAULT_FN_ATTRS _mm_comile_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
Definition xmmintrin.h:1065
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvt_ps2pi(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
Definition xmmintrin.h:1374
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float], using the one's complement of the value...
Definition xmmintrin.h:426
static __inline__ void __DEFAULT_FN_ATTRS _mm_storer_ps(float *__p, __m128 __a)
Stores float values from a 128-bit vector of [4 x float] to an aligned memory location in reverse ord...
Definition xmmintrin.h:2062
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_undefined_ps(void)
Create a 128-bit vector of [4 x float] with undefined values.
Definition xmmintrin.h:1776
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttps_pi32(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
Definition xmmintrin.h:1451
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:788
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomilt_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
Definition xmmintrin.h:1185
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:569
static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_ps(float *__p, __m128 __a)
Moves packed float values from a 128-bit vector of [4 x float] to a 128-bit aligned memory location.
Definition xmmintrin.h:2139
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvt_si2ss(__m128 __a, int __b)
Converts a 32-bit signed integer value into a floating point value and writes it to the lower 32 bits...
Definition xmmintrin.h:1513
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi16_ps(__m64 __a)
Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x float].
Definition xmmintrin.h:2729
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ps(__m128 __a)
Calculates the approximate reciprocals of the square roots of the values stored in a 128-bit vector o...
Definition xmmintrin.h:302
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi8(__m128 __a)
Converts each single-precision floating-point element of a 128-bit floating-point vector of [4 x floa...
Definition xmmintrin.h:2899
static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a)
Stores the lower 64 bits of a 128-bit vector of [4 x float] to a memory location.
Definition xmmintrin.h:1941
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomile_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
Definition xmmintrin.h:1210
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:677
static __inline__ int __DEFAULT_FN_ATTRS _mm_comieq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands for equality and returns the ...
Definition xmmintrin.h:1016
static __inline__ void __DEFAULT_FN_ATTRS _mm_store1_ps(float *__p, __m128 __a)
Stores the lower 32 bits of a 128-bit vector of [4 x float] into four contiguous elements in an align...
Definition xmmintrin.h:2023
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu16_ps(__m64 __a)
Converts a 64-bit vector of 16-bit unsigned integer values into a 128-bit vector of [4 x float].
Definition xmmintrin.h:2759
void _mm_sfence(void)
Forces strong memory ordering (serialization) between store instructions preceding this instruction a...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps1(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
Definition xmmintrin.h:1833
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
Definition xmmintrin.h:155
#define __DEFAULT_FN_ATTRS
Definition xmmintrin.h:31
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
Definition xmmintrin.h:386
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rsqrt_ss(__m128 __a)
Calculates the approximate reciprocal of the square root of the value stored in the low-order bits of...
Definition xmmintrin.h:285
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomige_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
Definition xmmintrin.h:1260
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu16(__m64 __a, __m64 __b)
Computes the rounded averages of the packed unsigned 16-bit integer values and writes the averages to...
Definition xmmintrin.h:2426
static __inline__ int __DEFAULT_FN_ATTRS _mm_comilt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
Definition xmmintrin.h:1041
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadl_pi(__m128 __a, const __m64 *__p)
Loads two packed float values from the address __p into the low-order bits of a 128-bit vector of [4 ...
Definition xmmintrin.h:1651
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_ps(float *__p, __m128 __a)
Stores a 128-bit vector of [4 x float] to an unaligned memory location.
Definition xmmintrin.h:1983
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:92
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
Definition xmmintrin.h:113
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load1_ps(const float *__p)
Loads a 32-bit float value and duplicates it to all four vector elements of a 128-bit vector of [4 x ...
Definition xmmintrin.h:1700
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movelh_ps(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2711
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
Definition xmmintrin.h:344
static __inline__ int __DEFAULT_FN_ATTRS _mm_comige_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
Definition xmmintrin.h:1113
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtss_si32(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...
Definition xmmintrin.h:1302
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:632
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setr_ps(float __z, float __y, float __x, float __w)
Constructs a 128-bit floating-point vector of [4 x float], initialized in reverse order with the spec...
Definition xmmintrin.h:1888
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomigt_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
Definition xmmintrin.h:1235
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
Definition xmmintrin.h:2624
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ss(float *__p, __m128 __a)
Stores the lower 32 bits of a 128-bit vector of [4 x float] to a memory location.
Definition xmmintrin.h:1962
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:833
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadh_pi(__m128 __a, const __m64 *__p)
Loads two packed float values from the address __p into the high-order bits of a 128-bit vector of [4...
Definition xmmintrin.h:1624
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
Definition xmmintrin.h:463
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_rcp_ps(__m128 __a)
Calculates the approximate reciprocals of the values stored in a 128-bit vector of [4 x float].
Definition xmmintrin.h:266
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2668
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set1_ps(float __w)
Constructs a 128-bit floating-point vector of [4 x float], with each of the four single-precision flo...
Definition xmmintrin.h:1814
static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_stream_pi(__m64 *__p, __m64 __a)
Stores a 64-bit integer in the specified aligned memory location.
Definition xmmintrin.h:2120
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps(float *__p, __m128 __a)
Stores a 128-bit vector of [4 x float] into an aligned memory location.
Definition xmmintrin.h:2004
void _mm_setcsr(unsigned int __i)
Sets the MXCSR register with the 32-bit unsigned integer value.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b)
Performs a bitwise OR of two 128-bit vectors of [4 x float].
Definition xmmintrin.h:444
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
Definition xmmintrin.h:231
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpneq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands for inequality and returns th...
Definition xmmintrin.h:700
static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtss_f32(__m128 __a)
Extracts a float value contained in the lower 32 bits of a vector of [4 x float].
Definition xmmintrin.h:1603
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pu8(__m64 __a, __m64 __b)
Compares each of the corresponding packed 8-bit unsigned integer values of the 64-bit integer vectors...
Definition xmmintrin.h:2250
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:135
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pi16(__m64 __a, __m64 __b)
Compares each of the corresponding packed 16-bit integer values of the 64-bit integer vectors,...
Definition xmmintrin.h:2269
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsi32_ss(__m128 __a, int __b)
Converts a 32-bit signed integer value into a floating point value and writes it to the lower 32 bits...
Definition xmmintrin.h:1490
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtt_ps2pi(__m128 __a)
Converts two low-order float values in a 128-bit vector of [4 x float] into a 64-bit vector of [2 x i...
Definition xmmintrin.h:1468
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtt_ss2si(__m128 __a)
Converts a float value contained in the lower 32 bits of a vector of [4 x float] into a 32-bit intege...
Definition xmmintrin.h:1412
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_ps(__m128 __a)
Extracts the sign bits from each single-precision floating-point element of a 128-bit floating-point ...
Definition xmmintrin.h:2924
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32x2_ps(__m64 __a, __m64 __b)
Converts the two 32-bit signed integer values from each 64-bit vector operand of [2 x i32] into a 128...
Definition xmmintrin.h:2840
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehl_ps(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2690
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadr_ps(const float *__p)
Loads four packed float values, in reverse order, from an aligned memory location to 32-bit elements ...
Definition xmmintrin.h:1762
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:927
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:763
static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a)
Stores the upper 64 bits of a 128-bit vector of [4 x float] to a memory location.
Definition xmmintrin.h:1920
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpngt_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:855
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnge_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:902
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpord_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:947
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpgt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:611
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvt_pi2ps(__m128 __a, __m64 __b)
Converts two elements of a 64-bit vector of [2 x i32] into two floating point values and writes them ...
Definition xmmintrin.h:1586
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomieq_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
Definition xmmintrin.h:1161
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:50
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_set_ps(float __z, float __y, float __x, float __w)
Constructs a 128-bit floating-point vector of [4 x float] initialized with the specified single-preci...
Definition xmmintrin.h:1860
static __inline__ int __DEFAULT_FN_ATTRS_MMX _mm_movemask_pi8(__m64 __a)
Takes the most significant bit from each 8-bit element in a 64-bit integer vector to create an 8-bit ...
Definition xmmintrin.h:2306
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnlt_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:743
static __inline__ void __DEFAULT_FN_ATTRS _mm_store_ps1(float *__p, __m128 __a)
Stores the lower 32 bits of a 128-bit vector of [4 x float] into four contiguous elements in an align...
Definition xmmintrin.h:2043
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands and returns the lesser value ...
Definition xmmintrin.h:325
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpu8_ps(__m64 __a)
Converts the lower four unsigned 8-bit integer values from a 64-bit vector of [8 x u8] into a 128-bit...
Definition xmmintrin.h:2813
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmple_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:588
unsigned int _mm_getcsr(void)
Returns the contents of the MXCSR register as a 32-bit unsigned integer value.
#define __DEFAULT_FN_ATTRS_MMX
Definition xmmintrin.h:32
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_avg_pu8(__m64 __a, __m64 __b)
Computes the rounded averages of the packed unsigned 8-bit integer values and writes the averages to ...
Definition xmmintrin.h:2407
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi8_ps(__m64 __a)
Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] into a 128-bit vector of [4 x f...
Definition xmmintrin.h:2788
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
Definition xmmintrin.h:2646
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands and returns the greater value...
Definition xmmintrin.h:367
static __inline__ void __DEFAULT_FN_ATTRS_MMX _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
Conditionally copies the values from each 8-bit element in the first 64-bit integer vector operand to...
Definition xmmintrin.h:2388
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_max_pi16(__m64 __a, __m64 __b)
Compares each of the corresponding packed 16-bit integer values of the 64-bit integer vectors,...
Definition xmmintrin.h:2231
static __inline__ int __DEFAULT_FN_ATTRS _mm_ucomineq_ss(__m128 __a, __m128 __b)
Performs an unordered comparison of two 32-bit float values using the low-order bits of both operands...
Definition xmmintrin.h:1284
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtps_pi16(__m128 __a)
Converts each single-precision floating-point element of a 128-bit floating-point vector of [4 x floa...
Definition xmmintrin.h:2869
static __inline__ int __DEFAULT_FN_ATTRS _mm_comineq_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the first ope...
Definition xmmintrin.h:1137
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_min_pu8(__m64 __a, __m64 __b)
Compares each of the corresponding packed 8-bit unsigned integer values of the 64-bit integer vectors...
Definition xmmintrin.h:2288
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpnle_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:808
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sad_pu8(__m64 __a, __m64 __b)
Subtracts the corresponding 8-bit unsigned integer values of the two 64-bit vector operands and compu...
Definition xmmintrin.h:2448
static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_ps(__m128 __a, __m64 __b)
Converts two elements of a 64-bit vector of [2 x i32] into two floating point values and writes them ...
Definition xmmintrin.h:1563
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:972
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpge_ss(__m128 __a, __m128 __b)
Compares two 32-bit float values in the low-order bits of both operands to determine if the value in ...
Definition xmmintrin.h:656
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cmpunord_ps(__m128 __a, __m128 __b)
Compares each of the corresponding 32-bit float values of the 128-bit vectors of [4 x float] to deter...
Definition xmmintrin.h:992
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadu_ps(const float *__p)
Loads a 128-bit floating-point vector of [4 x float] from an unaligned memory location.
Definition xmmintrin.h:1740
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_load_ss(const float *__p)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:1678
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:177