10 #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
13 #ifndef __AVX512FINTRIN_H
14 #define __AVX512FINTRIN_H
20 typedef long long __v8di
__attribute__((__vector_size__(64)));
24 typedef unsigned char __v64qu
__attribute__((__vector_size__(64)));
25 typedef unsigned short __v32hu
__attribute__((__vector_size__(64)));
26 typedef unsigned long long __v8du
__attribute__((__vector_size__(64)));
27 typedef unsigned int __v16su
__attribute__((__vector_size__(64)));
29 typedef float __m512
__attribute__((__vector_size__(64), __aligned__(64)));
30 typedef double __m512d
__attribute__((__vector_size__(64), __aligned__(64)));
31 typedef long long __m512i
__attribute__((__vector_size__(64), __aligned__(64)));
33 typedef float __m512_u
__attribute__((__vector_size__(64), __aligned__(1)));
34 typedef double __m512d_u
__attribute__((__vector_size__(64), __aligned__(1)));
35 typedef long long __m512i_u
__attribute__((__vector_size__(64), __aligned__(1)));
41 #define _MM_FROUND_TO_NEAREST_INT 0x00
42 #define _MM_FROUND_TO_NEG_INF 0x01
43 #define _MM_FROUND_TO_POS_INF 0x02
44 #define _MM_FROUND_TO_ZERO 0x03
45 #define _MM_FROUND_CUR_DIRECTION 0x04
55 #define _MM_CMPINT_GE _MM_CMPINT_NLT
57 #define _MM_CMPINT_GT _MM_CMPINT_NLE
166 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
167 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
168 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
175 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
178 #define _mm512_setzero_epi32 _mm512_setzero_si512
183 return (__m512d)__builtin_ia32_undef512();
189 return (__m512)__builtin_ia32_undef512();
195 return (__m512)__builtin_ia32_undef512();
201 return (__m512i)__builtin_ia32_undef512();
207 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
214 return (__m512i)__builtin_ia32_selectd_512(__M,
222 return (__m512i)__builtin_ia32_selectd_512(__M,
230 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
231 0, 0, 0, 0, 0, 0, 0, 0);
237 return (__m512i)__builtin_ia32_selectq_512(__M,
246 return (__m512i)__builtin_ia32_selectq_512(__M,
255 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
259 #define _mm512_setzero _mm512_setzero_ps
264 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
270 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271 __w, __w, __w, __w, __w, __w, __w, __w };
277 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
283 return __extension__ (__m512i)(__v64qi){
284 __w, __w, __w, __w, __w, __w, __w, __w,
285 __w, __w, __w, __w, __w, __w, __w, __w,
286 __w, __w, __w, __w, __w, __w, __w, __w,
287 __w, __w, __w, __w, __w, __w, __w, __w,
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w };
297 return __extension__ (__m512i)(__v32hi){
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w };
307 return __extension__ (__m512i)(__v16si){
308 __s, __s, __s, __s, __s, __s, __s, __s,
309 __s, __s, __s, __s, __s, __s, __s, __s };
315 return (__m512i)__builtin_ia32_selectd_512(__M,
323 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
329 return (__m512i)__builtin_ia32_selectq_512(__M,
337 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
344 return __extension__ (__m512i)(__v16si)
353 return __extension__ (__m512i) (__v8di)
360 return __extension__ (__m512d)
367 return __extension__ (__m512)
372 #define _mm512_setr4_epi32(e0,e1,e2,e3) \
373 _mm512_set4_epi32((e3),(e2),(e1),(e0))
375 #define _mm512_setr4_epi64(e0,e1,e2,e3) \
376 _mm512_set4_epi64((e3),(e2),(e1),(e0))
378 #define _mm512_setr4_pd(e0,e1,e2,e3) \
379 _mm512_set4_pd((e3),(e2),(e1),(e0))
381 #define _mm512_setr4_ps(e0,e1,e2,e3) \
382 _mm512_set4_ps((e3),(e2),(e1),(e0))
387 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
388 0, 0, 0, 0, 0, 0, 0, 0);
396 return __builtin_shufflevector(
__a,
__a, 0, 1, 2, 3, -1, -1, -1, -1);
402 return __builtin_shufflevector(
__a,
__a, 0, 1, 2, 3, 4, 5, 6, 7,
403 -1, -1, -1, -1, -1, -1, -1, -1);
409 return __builtin_shufflevector(
__a,
__a, 0, 1);
415 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
421 return __builtin_shufflevector(
__a,
__a, 0, 1, 2, 3);
427 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
433 return (__m512) (__A);
439 return (__m512i) (__A);
445 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
451 return (__m512d) (__A);
457 return (__m512i) (__A);
463 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
469 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
475 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
481 return (__m512) (__A);
487 return (__m512d) (__A);
493 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
499 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
530 return __builtin_shufflevector((__v2df)
__a, (__v2df)
_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
549 return __builtin_shufflevector((__v4df)
__a, (__v4df)
_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
567 return __builtin_shufflevector((__v4sf)
__a, (__v4sf)
_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
585 return __builtin_shufflevector((__v8sf)
__a, (__v8sf)
_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
603 return __builtin_shufflevector((__v2di)
__a, (__v2di)
_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
628 return (__m512i)((__v16su)
__a & (__v16su)
__b);
634 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
649 return (__m512i)((__v8du)
__a & (__v8du)
__b);
655 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __k,
670 return (__m512i)(~(__v8du)__A & (__v8du)__B);
676 return (__m512i)(~(__v16su)__A & (__v16su)__B);
682 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
697 return (__m512i)(~(__v8du)__A & (__v8du)__B);
703 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
718 return (__m512i)((__v16su)
__a | (__v16su)
__b);
724 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
738 return (__m512i)((__v8du)
__a | (__v8du)
__b);
744 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
758 return (__m512i)((__v16su)
__a ^ (__v16su)
__b);
764 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__k,
778 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
784 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__k,
798 return (__m512i)((__v8du)
__a & (__v8du)
__b);
804 return (__m512i)((__v8du)
__a | (__v8du)
__b);
810 return (__m512i)((__v8du)
__a ^ (__v8du)
__b);
818 return (__m512d)((__v8df)
__a + (__v8df)
__b);
824 return (__m512)((__v16sf)
__a + (__v16sf)
__b);
830 return (__m512d)((__v8df)
__a * (__v8df)
__b);
836 return (__m512)((__v16sf)
__a * (__v16sf)
__b);
842 return (__m512d)((__v8df)
__a - (__v8df)
__b);
848 return (__m512)((__v16sf)
__a - (__v16sf)
__b);
854 return (__m512i) ((__v8du) __A + (__v8du) __B);
860 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
868 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
876 return (__m512i) ((__v8du) __A - (__v8du) __B);
882 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
890 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
898 return (__m512i) ((__v16su) __A + (__v16su) __B);
904 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
912 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
920 return (__m512i) ((__v16su) __A - (__v16su) __B);
926 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
934 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
939 #define _mm512_max_round_pd(A, B, R) \
940 (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
941 (__v8df)(__m512d)(B), (int)(R))
943 #define _mm512_mask_max_round_pd(W, U, A, B, R) \
944 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
945 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
948 #define _mm512_maskz_max_round_pd(U, A, B, R) \
949 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
950 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
951 (__v8df)_mm512_setzero_pd())
956 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
963 return (__m512d)__builtin_ia32_selectpd_512(__U,
971 return (__m512d)__builtin_ia32_selectpd_512(__U,
976 #define _mm512_max_round_ps(A, B, R) \
977 (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978 (__v16sf)(__m512)(B), (int)(R))
980 #define _mm512_mask_max_round_ps(W, U, A, B, R) \
981 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
982 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
985 #define _mm512_maskz_max_round_ps(U, A, B, R) \
986 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
987 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
988 (__v16sf)_mm512_setzero_ps())
993 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
1000 return (__m512)__builtin_ia32_selectps_512(__U,
1008 return (__m512)__builtin_ia32_selectps_512(__U,
1015 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1024 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1031 #define _mm_max_round_ss(A, B, R) \
1032 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1033 (__v4sf)(__m128)(B), \
1034 (__v4sf)_mm_setzero_ps(), \
1035 (__mmask8)-1, (int)(R))
1037 #define _mm_mask_max_round_ss(W, U, A, B, R) \
1038 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1039 (__v4sf)(__m128)(B), \
1040 (__v4sf)(__m128)(W), (__mmask8)(U), \
1043 #define _mm_maskz_max_round_ss(U, A, B, R) \
1044 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1045 (__v4sf)(__m128)(B), \
1046 (__v4sf)_mm_setzero_ps(), \
1047 (__mmask8)(U), (int)(R))
1051 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1060 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1067 #define _mm_max_round_sd(A, B, R) \
1068 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1069 (__v2df)(__m128d)(B), \
1070 (__v2df)_mm_setzero_pd(), \
1071 (__mmask8)-1, (int)(R))
1073 #define _mm_mask_max_round_sd(W, U, A, B, R) \
1074 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1075 (__v2df)(__m128d)(B), \
1076 (__v2df)(__m128d)(W), \
1077 (__mmask8)(U), (int)(R))
1079 #define _mm_maskz_max_round_sd(U, A, B, R) \
1080 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1081 (__v2df)(__m128d)(B), \
1082 (__v2df)_mm_setzero_pd(), \
1083 (__mmask8)(U), (int)(R))
1085 static __inline __m512i
1089 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1095 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1103 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1111 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1117 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1125 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1133 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1139 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1147 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1155 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1161 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1169 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1174 #define _mm512_min_round_pd(A, B, R) \
1175 (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1176 (__v8df)(__m512d)(B), (int)(R))
1178 #define _mm512_mask_min_round_pd(W, U, A, B, R) \
1179 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1180 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1183 #define _mm512_maskz_min_round_pd(U, A, B, R) \
1184 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1185 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1186 (__v8df)_mm512_setzero_pd())
1191 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1198 return (__m512d)__builtin_ia32_selectpd_512(__U,
1206 return (__m512d)__builtin_ia32_selectpd_512(__U,
1211 #define _mm512_min_round_ps(A, B, R) \
1212 (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213 (__v16sf)(__m512)(B), (int)(R))
1215 #define _mm512_mask_min_round_ps(W, U, A, B, R) \
1216 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1217 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1220 #define _mm512_maskz_min_round_ps(U, A, B, R) \
1221 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1222 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1223 (__v16sf)_mm512_setzero_ps())
1228 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1235 return (__m512)__builtin_ia32_selectps_512(__U,
1243 return (__m512)__builtin_ia32_selectps_512(__U,
1250 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1259 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1266 #define _mm_min_round_ss(A, B, R) \
1267 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1268 (__v4sf)(__m128)(B), \
1269 (__v4sf)_mm_setzero_ps(), \
1270 (__mmask8)-1, (int)(R))
1272 #define _mm_mask_min_round_ss(W, U, A, B, R) \
1273 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1274 (__v4sf)(__m128)(B), \
1275 (__v4sf)(__m128)(W), (__mmask8)(U), \
1278 #define _mm_maskz_min_round_ss(U, A, B, R) \
1279 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1280 (__v4sf)(__m128)(B), \
1281 (__v4sf)_mm_setzero_ps(), \
1282 (__mmask8)(U), (int)(R))
1286 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1295 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1302 #define _mm_min_round_sd(A, B, R) \
1303 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1304 (__v2df)(__m128d)(B), \
1305 (__v2df)_mm_setzero_pd(), \
1306 (__mmask8)-1, (int)(R))
1308 #define _mm_mask_min_round_sd(W, U, A, B, R) \
1309 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1310 (__v2df)(__m128d)(B), \
1311 (__v2df)(__m128d)(W), \
1312 (__mmask8)(U), (int)(R))
1314 #define _mm_maskz_min_round_sd(U, A, B, R) \
1315 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1316 (__v2df)(__m128d)(B), \
1317 (__v2df)_mm_setzero_pd(), \
1318 (__mmask8)(U), (int)(R))
1320 static __inline __m512i
1324 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1330 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1338 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1346 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1352 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1360 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1368 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1374 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1382 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1390 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1396 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1404 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1412 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1418 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1426 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1434 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1440 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1448 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
1456 return (__m512i) ((__v16su) __A * (__v16su) __B);
1462 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1470 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
1477 return (__m512i) ((__v8du) __A * (__v8du) __B);
1482 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1487 #define _mm512_sqrt_round_pd(A, R) \
1488 (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1490 #define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1491 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1492 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1493 (__v8df)(__m512d)(W))
1495 #define _mm512_maskz_sqrt_round_pd(U, A, R) \
1496 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1497 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1498 (__v8df)_mm512_setzero_pd())
1503 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1510 return (__m512d)__builtin_ia32_selectpd_512(__U,
1518 return (__m512d)__builtin_ia32_selectpd_512(__U,
1523 #define _mm512_sqrt_round_ps(A, R) \
1524 (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1526 #define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1527 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1528 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1529 (__v16sf)(__m512)(W))
1531 #define _mm512_maskz_sqrt_round_ps(U, A, R) \
1532 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1533 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1534 (__v16sf)_mm512_setzero_ps())
1539 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1546 return (__m512)__builtin_ia32_selectps_512(__U,
1554 return (__m512)__builtin_ia32_selectps_512(__U,
1562 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1570 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1578 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1587 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1596 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1604 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1613 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1623 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1632 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1641 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1651 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1660 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1669 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1678 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1686 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1695 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1704 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1712 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1721 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1731 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1740 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1749 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1759 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1768 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1777 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1786 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1795 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1804 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1813 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1822 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1831 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1840 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1849 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1855 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1863 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
1871 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1877 return (__m512i)__builtin_ia32_selectd_512(__U,
1885 return (__m512i)__builtin_ia32_selectd_512(__U,
1893 return __builtin_ia32_selectss_128(__U, __A, __W);
1902 #define _mm_add_round_ss(A, B, R) \
1903 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1904 (__v4sf)(__m128)(B), \
1905 (__v4sf)_mm_setzero_ps(), \
1906 (__mmask8)-1, (int)(R))
1908 #define _mm_mask_add_round_ss(W, U, A, B, R) \
1909 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1910 (__v4sf)(__m128)(B), \
1911 (__v4sf)(__m128)(W), (__mmask8)(U), \
1914 #define _mm_maskz_add_round_ss(U, A, B, R) \
1915 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1916 (__v4sf)(__m128)(B), \
1917 (__v4sf)_mm_setzero_ps(), \
1918 (__mmask8)(U), (int)(R))
1923 return __builtin_ia32_selectsd_128(__U, __A, __W);
1931 #define _mm_add_round_sd(A, B, R) \
1932 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1933 (__v2df)(__m128d)(B), \
1934 (__v2df)_mm_setzero_pd(), \
1935 (__mmask8)-1, (int)(R))
1937 #define _mm_mask_add_round_sd(W, U, A, B, R) \
1938 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1939 (__v2df)(__m128d)(B), \
1940 (__v2df)(__m128d)(W), \
1941 (__mmask8)(U), (int)(R))
1943 #define _mm_maskz_add_round_sd(U, A, B, R) \
1944 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945 (__v2df)(__m128d)(B), \
1946 (__v2df)_mm_setzero_pd(), \
1947 (__mmask8)(U), (int)(R))
1951 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1958 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
1965 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1972 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
1977 #define _mm512_add_round_pd(A, B, R) \
1978 (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1979 (__v8df)(__m512d)(B), (int)(R))
1981 #define _mm512_mask_add_round_pd(W, U, A, B, R) \
1982 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1983 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1984 (__v8df)(__m512d)(W))
1986 #define _mm512_maskz_add_round_pd(U, A, B, R) \
1987 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1988 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1989 (__v8df)_mm512_setzero_pd())
1991 #define _mm512_add_round_ps(A, B, R) \
1992 (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993 (__v16sf)(__m512)(B), (int)(R))
1995 #define _mm512_mask_add_round_ps(W, U, A, B, R) \
1996 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1997 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1998 (__v16sf)(__m512)(W))
2000 #define _mm512_maskz_add_round_ps(U, A, B, R) \
2001 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2002 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2003 (__v16sf)_mm512_setzero_ps())
2008 return __builtin_ia32_selectss_128(__U, __A, __W);
2016 #define _mm_sub_round_ss(A, B, R) \
2017 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2018 (__v4sf)(__m128)(B), \
2019 (__v4sf)_mm_setzero_ps(), \
2020 (__mmask8)-1, (int)(R))
2022 #define _mm_mask_sub_round_ss(W, U, A, B, R) \
2023 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2024 (__v4sf)(__m128)(B), \
2025 (__v4sf)(__m128)(W), (__mmask8)(U), \
2028 #define _mm_maskz_sub_round_ss(U, A, B, R) \
2029 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2030 (__v4sf)(__m128)(B), \
2031 (__v4sf)_mm_setzero_ps(), \
2032 (__mmask8)(U), (int)(R))
2037 return __builtin_ia32_selectsd_128(__U, __A, __W);
2046 #define _mm_sub_round_sd(A, B, R) \
2047 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2048 (__v2df)(__m128d)(B), \
2049 (__v2df)_mm_setzero_pd(), \
2050 (__mmask8)-1, (int)(R))
2052 #define _mm_mask_sub_round_sd(W, U, A, B, R) \
2053 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2054 (__v2df)(__m128d)(B), \
2055 (__v2df)(__m128d)(W), \
2056 (__mmask8)(U), (int)(R))
2058 #define _mm_maskz_sub_round_sd(U, A, B, R) \
2059 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2060 (__v2df)(__m128d)(B), \
2061 (__v2df)_mm_setzero_pd(), \
2062 (__mmask8)(U), (int)(R))
2066 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2073 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2080 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2087 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2092 #define _mm512_sub_round_pd(A, B, R) \
2093 (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2094 (__v8df)(__m512d)(B), (int)(R))
2096 #define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2097 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2098 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2099 (__v8df)(__m512d)(W))
2101 #define _mm512_maskz_sub_round_pd(U, A, B, R) \
2102 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2103 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2104 (__v8df)_mm512_setzero_pd())
2106 #define _mm512_sub_round_ps(A, B, R) \
2107 (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108 (__v16sf)(__m512)(B), (int)(R))
2110 #define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2111 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2112 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2113 (__v16sf)(__m512)(W))
2115 #define _mm512_maskz_sub_round_ps(U, A, B, R) \
2116 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2117 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2118 (__v16sf)_mm512_setzero_ps())
2123 return __builtin_ia32_selectss_128(__U, __A, __W);
2131 #define _mm_mul_round_ss(A, B, R) \
2132 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2133 (__v4sf)(__m128)(B), \
2134 (__v4sf)_mm_setzero_ps(), \
2135 (__mmask8)-1, (int)(R))
2137 #define _mm_mask_mul_round_ss(W, U, A, B, R) \
2138 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2139 (__v4sf)(__m128)(B), \
2140 (__v4sf)(__m128)(W), (__mmask8)(U), \
2143 #define _mm_maskz_mul_round_ss(U, A, B, R) \
2144 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2145 (__v4sf)(__m128)(B), \
2146 (__v4sf)_mm_setzero_ps(), \
2147 (__mmask8)(U), (int)(R))
2152 return __builtin_ia32_selectsd_128(__U, __A, __W);
2161 #define _mm_mul_round_sd(A, B, R) \
2162 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2163 (__v2df)(__m128d)(B), \
2164 (__v2df)_mm_setzero_pd(), \
2165 (__mmask8)-1, (int)(R))
2167 #define _mm_mask_mul_round_sd(W, U, A, B, R) \
2168 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2169 (__v2df)(__m128d)(B), \
2170 (__v2df)(__m128d)(W), \
2171 (__mmask8)(U), (int)(R))
2173 #define _mm_maskz_mul_round_sd(U, A, B, R) \
2174 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2175 (__v2df)(__m128d)(B), \
2176 (__v2df)_mm_setzero_pd(), \
2177 (__mmask8)(U), (int)(R))
2181 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2188 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2195 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2202 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2207 #define _mm512_mul_round_pd(A, B, R) \
2208 (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2209 (__v8df)(__m512d)(B), (int)(R))
2211 #define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2212 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2213 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2214 (__v8df)(__m512d)(W))
2216 #define _mm512_maskz_mul_round_pd(U, A, B, R) \
2217 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2218 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2219 (__v8df)_mm512_setzero_pd())
2221 #define _mm512_mul_round_ps(A, B, R) \
2222 (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223 (__v16sf)(__m512)(B), (int)(R))
2225 #define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2226 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2227 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2228 (__v16sf)(__m512)(W))
2230 #define _mm512_maskz_mul_round_ps(U, A, B, R) \
2231 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2232 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2233 (__v16sf)_mm512_setzero_ps())
2238 return __builtin_ia32_selectss_128(__U, __A, __W);
2247 #define _mm_div_round_ss(A, B, R) \
2248 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2249 (__v4sf)(__m128)(B), \
2250 (__v4sf)_mm_setzero_ps(), \
2251 (__mmask8)-1, (int)(R))
2253 #define _mm_mask_div_round_ss(W, U, A, B, R) \
2254 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2255 (__v4sf)(__m128)(B), \
2256 (__v4sf)(__m128)(W), (__mmask8)(U), \
2259 #define _mm_maskz_div_round_ss(U, A, B, R) \
2260 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2261 (__v4sf)(__m128)(B), \
2262 (__v4sf)_mm_setzero_ps(), \
2263 (__mmask8)(U), (int)(R))
2268 return __builtin_ia32_selectsd_128(__U, __A, __W);
2277 #define _mm_div_round_sd(A, B, R) \
2278 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2279 (__v2df)(__m128d)(B), \
2280 (__v2df)_mm_setzero_pd(), \
2281 (__mmask8)-1, (int)(R))
2283 #define _mm_mask_div_round_sd(W, U, A, B, R) \
2284 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2285 (__v2df)(__m128d)(B), \
2286 (__v2df)(__m128d)(W), \
2287 (__mmask8)(U), (int)(R))
2289 #define _mm_maskz_div_round_sd(U, A, B, R) \
2290 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2291 (__v2df)(__m128d)(B), \
2292 (__v2df)_mm_setzero_pd(), \
2293 (__mmask8)(U), (int)(R))
2298 return (__m512d)((__v8df)
__a/(__v8df)
__b);
2303 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2310 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
2318 return (__m512)((__v16sf)
__a/(__v16sf)
__b);
2323 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2330 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
2335 #define _mm512_div_round_pd(A, B, R) \
2336 (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2337 (__v8df)(__m512d)(B), (int)(R))
2339 #define _mm512_mask_div_round_pd(W, U, A, B, R) \
2340 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2341 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2342 (__v8df)(__m512d)(W))
2344 #define _mm512_maskz_div_round_pd(U, A, B, R) \
2345 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2346 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2347 (__v8df)_mm512_setzero_pd())
2349 #define _mm512_div_round_ps(A, B, R) \
2350 (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2351 (__v16sf)(__m512)(B), (int)(R))
2353 #define _mm512_mask_div_round_ps(W, U, A, B, R) \
2354 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2355 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2356 (__v16sf)(__m512)(W))
2358 #define _mm512_maskz_div_round_ps(U, A, B, R) \
2359 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2360 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2361 (__v16sf)_mm512_setzero_ps())
2363 #define _mm512_roundscale_ps(A, B) \
2364 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2365 (__v16sf)_mm512_undefined_ps(), \
2367 _MM_FROUND_CUR_DIRECTION)
2369 #define _mm512_mask_roundscale_ps(A, B, C, imm) \
2370 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2371 (__v16sf)(__m512)(A), (__mmask16)(B), \
2372 _MM_FROUND_CUR_DIRECTION)
2374 #define _mm512_maskz_roundscale_ps(A, B, imm) \
2375 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2376 (__v16sf)_mm512_setzero_ps(), \
2378 _MM_FROUND_CUR_DIRECTION)
2380 #define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2381 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2382 (__v16sf)(__m512)(A), (__mmask16)(B), \
2385 #define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2386 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2387 (__v16sf)_mm512_setzero_ps(), \
2388 (__mmask16)(A), (int)(R))
2390 #define _mm512_roundscale_round_ps(A, imm, R) \
2391 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2392 (__v16sf)_mm512_undefined_ps(), \
2393 (__mmask16)-1, (int)(R))
2395 #define _mm512_roundscale_pd(A, B) \
2396 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2397 (__v8df)_mm512_undefined_pd(), \
2399 _MM_FROUND_CUR_DIRECTION)
2401 #define _mm512_mask_roundscale_pd(A, B, C, imm) \
2402 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2403 (__v8df)(__m512d)(A), (__mmask8)(B), \
2404 _MM_FROUND_CUR_DIRECTION)
2406 #define _mm512_maskz_roundscale_pd(A, B, imm) \
2407 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2408 (__v8df)_mm512_setzero_pd(), \
2410 _MM_FROUND_CUR_DIRECTION)
2412 #define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2413 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2414 (__v8df)(__m512d)(A), (__mmask8)(B), \
2417 #define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2418 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2419 (__v8df)_mm512_setzero_pd(), \
2420 (__mmask8)(A), (int)(R))
2422 #define _mm512_roundscale_round_pd(A, imm, R) \
2423 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2424 (__v8df)_mm512_undefined_pd(), \
2425 (__mmask8)-1, (int)(R))
2427 #define _mm512_fmadd_round_pd(A, B, C, R) \
2428 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2429 (__v8df)(__m512d)(B), \
2430 (__v8df)(__m512d)(C), \
2431 (__mmask8)-1, (int)(R))
2434 #define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2435 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2436 (__v8df)(__m512d)(B), \
2437 (__v8df)(__m512d)(C), \
2438 (__mmask8)(U), (int)(R))
2441 #define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2442 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2443 (__v8df)(__m512d)(B), \
2444 (__v8df)(__m512d)(C), \
2445 (__mmask8)(U), (int)(R))
2448 #define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2449 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2450 (__v8df)(__m512d)(B), \
2451 (__v8df)(__m512d)(C), \
2452 (__mmask8)(U), (int)(R))
2455 #define _mm512_fmsub_round_pd(A, B, C, R) \
2456 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2457 (__v8df)(__m512d)(B), \
2458 -(__v8df)(__m512d)(C), \
2459 (__mmask8)-1, (int)(R))
2462 #define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2463 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2464 (__v8df)(__m512d)(B), \
2465 -(__v8df)(__m512d)(C), \
2466 (__mmask8)(U), (int)(R))
2469 #define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2470 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2471 (__v8df)(__m512d)(B), \
2472 -(__v8df)(__m512d)(C), \
2473 (__mmask8)(U), (int)(R))
2476 #define _mm512_fnmadd_round_pd(A, B, C, R) \
2477 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2478 (__v8df)(__m512d)(B), \
2479 (__v8df)(__m512d)(C), \
2480 (__mmask8)-1, (int)(R))
2483 #define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2484 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2485 (__v8df)(__m512d)(B), \
2486 (__v8df)(__m512d)(C), \
2487 (__mmask8)(U), (int)(R))
2490 #define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2491 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2492 (__v8df)(__m512d)(B), \
2493 (__v8df)(__m512d)(C), \
2494 (__mmask8)(U), (int)(R))
2497 #define _mm512_fnmsub_round_pd(A, B, C, R) \
2498 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2499 (__v8df)(__m512d)(B), \
2500 -(__v8df)(__m512d)(C), \
2501 (__mmask8)-1, (int)(R))
2504 #define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2505 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506 (__v8df)(__m512d)(B), \
2507 -(__v8df)(__m512d)(C), \
2508 (__mmask8)(U), (int)(R))
2514 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2524 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2534 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2544 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2554 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2564 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2574 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2584 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2594 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2604 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2614 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2624 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2631 #define _mm512_fmadd_round_ps(A, B, C, R) \
2632 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2633 (__v16sf)(__m512)(B), \
2634 (__v16sf)(__m512)(C), \
2635 (__mmask16)-1, (int)(R))
2638 #define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2639 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2640 (__v16sf)(__m512)(B), \
2641 (__v16sf)(__m512)(C), \
2642 (__mmask16)(U), (int)(R))
2645 #define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2646 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2647 (__v16sf)(__m512)(B), \
2648 (__v16sf)(__m512)(C), \
2649 (__mmask16)(U), (int)(R))
2652 #define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2653 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2654 (__v16sf)(__m512)(B), \
2655 (__v16sf)(__m512)(C), \
2656 (__mmask16)(U), (int)(R))
2659 #define _mm512_fmsub_round_ps(A, B, C, R) \
2660 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2661 (__v16sf)(__m512)(B), \
2662 -(__v16sf)(__m512)(C), \
2663 (__mmask16)-1, (int)(R))
2666 #define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2667 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2668 (__v16sf)(__m512)(B), \
2669 -(__v16sf)(__m512)(C), \
2670 (__mmask16)(U), (int)(R))
2673 #define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2674 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2675 (__v16sf)(__m512)(B), \
2676 -(__v16sf)(__m512)(C), \
2677 (__mmask16)(U), (int)(R))
2680 #define _mm512_fnmadd_round_ps(A, B, C, R) \
2681 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682 -(__v16sf)(__m512)(B), \
2683 (__v16sf)(__m512)(C), \
2684 (__mmask16)-1, (int)(R))
2687 #define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2688 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2689 (__v16sf)(__m512)(B), \
2690 (__v16sf)(__m512)(C), \
2691 (__mmask16)(U), (int)(R))
2694 #define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2695 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2696 (__v16sf)(__m512)(B), \
2697 (__v16sf)(__m512)(C), \
2698 (__mmask16)(U), (int)(R))
2701 #define _mm512_fnmsub_round_ps(A, B, C, R) \
2702 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2703 -(__v16sf)(__m512)(B), \
2704 -(__v16sf)(__m512)(C), \
2705 (__mmask16)-1, (int)(R))
2708 #define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2709 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710 (__v16sf)(__m512)(B), \
2711 -(__v16sf)(__m512)(C), \
2712 (__mmask16)(U), (int)(R))
2718 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2728 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2738 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2748 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2758 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2768 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2778 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2798 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2808 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2828 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2835 #define _mm512_fmaddsub_round_pd(A, B, C, R) \
2836 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2837 (__v8df)(__m512d)(B), \
2838 (__v8df)(__m512d)(C), \
2839 (__mmask8)-1, (int)(R))
2842 #define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2843 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2844 (__v8df)(__m512d)(B), \
2845 (__v8df)(__m512d)(C), \
2846 (__mmask8)(U), (int)(R))
2849 #define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2850 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2851 (__v8df)(__m512d)(B), \
2852 (__v8df)(__m512d)(C), \
2853 (__mmask8)(U), (int)(R))
2856 #define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2857 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2858 (__v8df)(__m512d)(B), \
2859 (__v8df)(__m512d)(C), \
2860 (__mmask8)(U), (int)(R))
2863 #define _mm512_fmsubadd_round_pd(A, B, C, R) \
2864 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2865 (__v8df)(__m512d)(B), \
2866 -(__v8df)(__m512d)(C), \
2867 (__mmask8)-1, (int)(R))
2870 #define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2871 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2872 (__v8df)(__m512d)(B), \
2873 -(__v8df)(__m512d)(C), \
2874 (__mmask8)(U), (int)(R))
2877 #define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2878 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2879 (__v8df)(__m512d)(B), \
2880 -(__v8df)(__m512d)(C), \
2881 (__mmask8)(U), (int)(R))
2887 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2897 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2907 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2917 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2927 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2937 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2947 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2954 #define _mm512_fmaddsub_round_ps(A, B, C, R) \
2955 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2956 (__v16sf)(__m512)(B), \
2957 (__v16sf)(__m512)(C), \
2958 (__mmask16)-1, (int)(R))
2961 #define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2962 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2963 (__v16sf)(__m512)(B), \
2964 (__v16sf)(__m512)(C), \
2965 (__mmask16)(U), (int)(R))
2968 #define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2969 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2970 (__v16sf)(__m512)(B), \
2971 (__v16sf)(__m512)(C), \
2972 (__mmask16)(U), (int)(R))
2975 #define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2976 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2977 (__v16sf)(__m512)(B), \
2978 (__v16sf)(__m512)(C), \
2979 (__mmask16)(U), (int)(R))
2982 #define _mm512_fmsubadd_round_ps(A, B, C, R) \
2983 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2984 (__v16sf)(__m512)(B), \
2985 -(__v16sf)(__m512)(C), \
2986 (__mmask16)-1, (int)(R))
2989 #define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2990 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2991 (__v16sf)(__m512)(B), \
2992 -(__v16sf)(__m512)(C), \
2993 (__mmask16)(U), (int)(R))
2996 #define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2997 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2998 (__v16sf)(__m512)(B), \
2999 -(__v16sf)(__m512)(C), \
3000 (__mmask16)(U), (int)(R))
3006 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3016 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3026 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3036 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3046 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3056 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3066 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3073 #define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3074 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3075 (__v8df)(__m512d)(B), \
3076 (__v8df)(__m512d)(C), \
3077 (__mmask8)(U), (int)(R))
3083 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3090 #define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3091 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3092 (__v16sf)(__m512)(B), \
3093 (__v16sf)(__m512)(C), \
3094 (__mmask16)(U), (int)(R))
3099 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3106 #define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3107 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3108 (__v8df)(__m512d)(B), \
3109 (__v8df)(__m512d)(C), \
3110 (__mmask8)(U), (int)(R))
3116 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3123 #define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3124 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3125 (__v16sf)(__m512)(B), \
3126 (__v16sf)(__m512)(C), \
3127 (__mmask16)(U), (int)(R))
3133 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3140 #define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3141 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3142 -(__v8df)(__m512d)(B), \
3143 (__v8df)(__m512d)(C), \
3144 (__mmask8)(U), (int)(R))
3150 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3157 #define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3158 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3159 -(__v16sf)(__m512)(B), \
3160 (__v16sf)(__m512)(C), \
3161 (__mmask16)(U), (int)(R))
3167 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3174 #define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3175 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3176 -(__v8df)(__m512d)(B), \
3177 -(__v8df)(__m512d)(C), \
3178 (__mmask8)(U), (int)(R))
3181 #define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3182 (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3183 (__v8df)(__m512d)(B), \
3184 (__v8df)(__m512d)(C), \
3185 (__mmask8)(U), (int)(R))
3191 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3201 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3208 #define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3209 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3210 -(__v16sf)(__m512)(B), \
3211 -(__v16sf)(__m512)(C), \
3212 (__mmask16)(U), (int)(R))
3215 #define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3216 (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3217 (__v16sf)(__m512)(B), \
3218 (__v16sf)(__m512)(C), \
3219 (__mmask16)(U), (int)(R))
3225 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3235 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3249 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3257 return (__m512i)__builtin_ia32_selectd_512(__U,
3266 return (__m512i)__builtin_ia32_selectd_512(__U,
3275 return (__m512i)__builtin_ia32_selectd_512(__U,
3283 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3291 return (__m512i)__builtin_ia32_selectq_512(__U,
3300 return (__m512i)__builtin_ia32_selectq_512(__U,
3309 return (__m512i)__builtin_ia32_selectq_512(__U,
3314 #define _mm512_alignr_epi64(A, B, I) \
3315 (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3316 (__v8di)(__m512i)(B), (int)(I))
3318 #define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3319 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3320 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3321 (__v8di)(__m512i)(W))
3323 #define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3324 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3325 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3326 (__v8di)_mm512_setzero_si512())
3328 #define _mm512_alignr_epi32(A, B, I) \
3329 (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3330 (__v16si)(__m512i)(B), (int)(I))
3332 #define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3333 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3334 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3335 (__v16si)(__m512i)(W))
3337 #define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3338 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3339 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3340 (__v16si)_mm512_setzero_si512())
3343 #define _mm512_extractf64x4_pd(A, I) \
3344 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3345 (__v4df)_mm256_undefined_pd(), \
3348 #define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3349 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3350 (__v4df)(__m256d)(W), \
3353 #define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3354 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3355 (__v4df)_mm256_setzero_pd(), \
3358 #define _mm512_extractf32x4_ps(A, I) \
3359 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3360 (__v4sf)_mm_undefined_ps(), \
3363 #define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3364 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3365 (__v4sf)(__m128)(W), \
3368 #define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3369 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3370 (__v4sf)_mm_setzero_ps(), \
3378 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
3386 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
3394 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
3402 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
3409 #define _mm512_cmp_round_ps_mask(A, B, P, R) \
3410 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3411 (__v16sf)(__m512)(B), (int)(P), \
3412 (__mmask16)-1, (int)(R))
3414 #define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3415 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3416 (__v16sf)(__m512)(B), (int)(P), \
3417 (__mmask16)(U), (int)(R))
3419 #define _mm512_cmp_ps_mask(A, B, P) \
3420 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3421 #define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3422 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3424 #define _mm512_cmpeq_ps_mask(A, B) \
3425 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3426 #define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3427 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3429 #define _mm512_cmplt_ps_mask(A, B) \
3430 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3431 #define _mm512_mask_cmplt_ps_mask(k, A, B) \
3432 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3434 #define _mm512_cmple_ps_mask(A, B) \
3435 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3436 #define _mm512_mask_cmple_ps_mask(k, A, B) \
3437 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3439 #define _mm512_cmpunord_ps_mask(A, B) \
3440 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3441 #define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3442 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3444 #define _mm512_cmpneq_ps_mask(A, B) \
3445 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3446 #define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3447 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3449 #define _mm512_cmpnlt_ps_mask(A, B) \
3450 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3451 #define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3452 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3454 #define _mm512_cmpnle_ps_mask(A, B) \
3455 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3456 #define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3457 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3459 #define _mm512_cmpord_ps_mask(A, B) \
3460 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3461 #define _mm512_mask_cmpord_ps_mask(k, A, B) \
3462 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3464 #define _mm512_cmp_round_pd_mask(A, B, P, R) \
3465 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3466 (__v8df)(__m512d)(B), (int)(P), \
3467 (__mmask8)-1, (int)(R))
3469 #define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3470 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3471 (__v8df)(__m512d)(B), (int)(P), \
3472 (__mmask8)(U), (int)(R))
3474 #define _mm512_cmp_pd_mask(A, B, P) \
3475 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3476 #define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3477 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3479 #define _mm512_cmpeq_pd_mask(A, B) \
3480 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3481 #define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3482 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3484 #define _mm512_cmplt_pd_mask(A, B) \
3485 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3486 #define _mm512_mask_cmplt_pd_mask(k, A, B) \
3487 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3489 #define _mm512_cmple_pd_mask(A, B) \
3490 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3491 #define _mm512_mask_cmple_pd_mask(k, A, B) \
3492 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3494 #define _mm512_cmpunord_pd_mask(A, B) \
3495 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3496 #define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3497 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3499 #define _mm512_cmpneq_pd_mask(A, B) \
3500 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3501 #define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3502 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3504 #define _mm512_cmpnlt_pd_mask(A, B) \
3505 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3506 #define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3507 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3509 #define _mm512_cmpnle_pd_mask(A, B) \
3510 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3511 #define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3512 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3514 #define _mm512_cmpord_pd_mask(A, B) \
3515 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3516 #define _mm512_mask_cmpord_pd_mask(k, A, B) \
3517 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3521 #define _mm512_cvtt_roundps_epu32(A, R) \
3522 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3523 (__v16si)_mm512_undefined_epi32(), \
3524 (__mmask16)-1, (int)(R))
3526 #define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3527 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3528 (__v16si)(__m512i)(W), \
3529 (__mmask16)(U), (int)(R))
3531 #define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3532 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533 (__v16si)_mm512_setzero_si512(), \
3534 (__mmask16)(U), (int)(R))
3540 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3550 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3559 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3565 #define _mm512_cvt_roundepi32_ps(A, R) \
3566 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3567 (__v16sf)_mm512_setzero_ps(), \
3568 (__mmask16)-1, (int)(R))
3570 #define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3571 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3572 (__v16sf)(__m512)(W), \
3573 (__mmask16)(U), (int)(R))
3575 #define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3576 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577 (__v16sf)_mm512_setzero_ps(), \
3578 (__mmask16)(U), (int)(R))
3580 #define _mm512_cvt_roundepu32_ps(A, R) \
3581 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3582 (__v16sf)_mm512_setzero_ps(), \
3583 (__mmask16)-1, (int)(R))
3585 #define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3586 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3587 (__v16sf)(__m512)(W), \
3588 (__mmask16)(U), (int)(R))
3590 #define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3591 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592 (__v16sf)_mm512_setzero_ps(), \
3593 (__mmask16)(U), (int)(R))
3598 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3604 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3612 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3620 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3626 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3634 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3654 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3660 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3668 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
3676 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3682 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3690 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
3707 #define _mm512_cvt_roundpd_ps(A, R) \
3708 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3709 (__v8sf)_mm256_setzero_ps(), \
3710 (__mmask8)-1, (int)(R))
3712 #define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3713 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3714 (__v8sf)(__m256)(W), (__mmask8)(U), \
3717 #define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3718 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719 (__v8sf)_mm256_setzero_ps(), \
3720 (__mmask8)(U), (int)(R))
3725 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3734 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3743 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3752 return (__m512) __builtin_shufflevector((__v8sf)
_mm512_cvtpd_ps(__A),
3754 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3760 return (__m512) __builtin_shufflevector (
3764 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3767 #define _mm512_cvt_roundps_ph(A, I) \
3768 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3769 (__v16hi)_mm256_undefined_si256(), \
3772 #define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3773 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3774 (__v16hi)(__m256i)(U), \
3777 #define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3778 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779 (__v16hi)_mm256_setzero_si256(), \
3782 #define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3783 #define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3784 #define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3786 #define _mm512_cvt_roundph_ps(A, R) \
3787 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3788 (__v16sf)_mm512_undefined_ps(), \
3789 (__mmask16)-1, (int)(R))
3791 #define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3792 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3793 (__v16sf)(__m512)(W), \
3794 (__mmask16)(U), (int)(R))
3796 #define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3797 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3798 (__v16sf)_mm512_setzero_ps(), \
3799 (__mmask16)(U), (int)(R))
3805 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3815 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3824 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3830 #define _mm512_cvtt_roundpd_epi32(A, R) \
3831 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3832 (__v8si)_mm256_setzero_si256(), \
3833 (__mmask8)-1, (int)(R))
3835 #define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3836 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3837 (__v8si)(__m256i)(W), \
3838 (__mmask8)(U), (int)(R))
3840 #define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3841 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3842 (__v8si)_mm256_setzero_si256(), \
3843 (__mmask8)(U), (int)(R))
3848 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)
__a,
3857 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3866 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3872 #define _mm512_cvtt_roundps_epi32(A, R) \
3873 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3874 (__v16si)_mm512_setzero_si512(), \
3875 (__mmask16)-1, (int)(R))
3877 #define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3878 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3879 (__v16si)(__m512i)(W), \
3880 (__mmask16)(U), (int)(R))
3882 #define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3883 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3884 (__v16si)_mm512_setzero_si512(), \
3885 (__mmask16)(U), (int)(R))
3891 __builtin_ia32_cvttps2dq512_mask((__v16sf)
__a,
3899 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3908 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3914 #define _mm512_cvt_roundps_epi32(A, R) \
3915 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3916 (__v16si)_mm512_setzero_si512(), \
3917 (__mmask16)-1, (int)(R))
3919 #define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3920 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3921 (__v16si)(__m512i)(W), \
3922 (__mmask16)(U), (int)(R))
3924 #define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3925 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3926 (__v16si)_mm512_setzero_si512(), \
3927 (__mmask16)(U), (int)(R))
3932 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3941 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3950 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3957 #define _mm512_cvt_roundpd_epi32(A, R) \
3958 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3959 (__v8si)_mm256_setzero_si256(), \
3960 (__mmask8)-1, (int)(R))
3962 #define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3963 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3964 (__v8si)(__m256i)(W), \
3965 (__mmask8)(U), (int)(R))
3967 #define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3968 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3969 (__v8si)_mm256_setzero_si256(), \
3970 (__mmask8)(U), (int)(R))
3975 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3985 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3994 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
4001 #define _mm512_cvt_roundps_epu32(A, R) \
4002 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4003 (__v16si)_mm512_setzero_si512(), \
4004 (__mmask16)-1, (int)(R))
4006 #define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4007 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4008 (__v16si)(__m512i)(W), \
4009 (__mmask16)(U), (int)(R))
4011 #define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4012 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4013 (__v16si)_mm512_setzero_si512(), \
4014 (__mmask16)(U), (int)(R))
4019 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4029 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4038 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4045 #define _mm512_cvt_roundpd_epu32(A, R) \
4046 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4047 (__v8si)_mm256_setzero_si256(), \
4048 (__mmask8)-1, (int)(R))
4050 #define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4051 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4052 (__v8si)(__m256i)(W), \
4053 (__mmask8)(U), (int)(R))
4055 #define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4056 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4057 (__v8si)_mm256_setzero_si256(), \
4058 (__mmask8)(U), (int)(R))
4063 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4073 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4082 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4106 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
4107 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4113 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4121 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4129 return (__m512d)__builtin_shufflevector((__v8df)
__a, (__v8df)
__b,
4130 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4136 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4144 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8) __U,
4152 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
4154 2+4, 18+4, 3+4, 19+4,
4155 2+8, 18+8, 3+8, 19+8,
4156 2+12, 18+12, 3+12, 19+12);
4162 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4170 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4178 return (__m512)__builtin_shufflevector((__v16sf)
__a, (__v16sf)
__b,
4180 0+4, 16+4, 1+4, 17+4,
4181 0+8, 16+8, 1+8, 17+8,
4182 0+12, 16+12, 1+12, 17+12);
4188 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4196 return (__m512)__builtin_ia32_selectps_512((
__mmask16) __U,
4204 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4206 2+4, 18+4, 3+4, 19+4,
4207 2+8, 18+8, 3+8, 19+8,
4208 2+12, 18+12, 3+12, 19+12);
4214 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4222 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4230 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4232 0+4, 16+4, 1+4, 17+4,
4233 0+8, 16+8, 1+8, 17+8,
4234 0+12, 16+12, 1+12, 17+12);
4240 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4248 return (__m512i)__builtin_ia32_selectd_512((
__mmask16) __U,
4256 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4257 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4263 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4271 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4279 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4286 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4294 return (__m512i)__builtin_ia32_selectq_512((
__mmask8) __U,
4305 struct __loadu_si512 {
4308 return ((
const struct __loadu_si512*)__P)->__v;
4314 struct __loadu_epi32 {
4317 return ((
const struct __loadu_epi32*)__P)->__v;
4323 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *) __P,
4332 return (__m512i) __builtin_ia32_loaddqusi512_mask ((
const int *)__P,
4341 struct __loadu_epi64 {
4344 return ((
const struct __loadu_epi64*)__P)->__v;
4350 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *) __P,
4358 return (__m512i) __builtin_ia32_loaddqudi512_mask ((
const long long *)__P,
4367 return (__m512) __builtin_ia32_loadups512_mask ((
const float *) __P,
4375 return (__m512) __builtin_ia32_loadups512_mask ((
const float *)__P,
4384 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *) __P,
4392 return (__m512d) __builtin_ia32_loadupd512_mask ((
const double *)__P,
4404 return ((
const struct __loadu_pd*)
__p)->__v;
4413 return ((
const struct __loadu_ps*)
__p)->__v;
4419 return *(
const __m512*)
__p;
4425 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *) __P,
4433 return (__m512) __builtin_ia32_loadaps512_mask ((
const __v16sf *)__P,
4442 return *(
const __m512d*)
__p;
4448 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *) __P,
4456 return (__m512d) __builtin_ia32_loadapd512_mask ((
const __v8df *)__P,
4465 return *(
const __m512i *) __P;
4471 return *(
const __m512i *) __P;
4477 return *(
const __m512i *) __P;
4485 struct __storeu_epi64 {
4488 ((
struct __storeu_epi64*)__P)->__v = __A;
4494 __builtin_ia32_storedqudi512_mask ((
long long *)__P, (__v8di) __A,
4501 struct __storeu_si512 {
4504 ((
struct __storeu_si512*)__P)->__v = __A;
4510 struct __storeu_epi32 {
4513 ((
struct __storeu_epi32*)__P)->__v = __A;
4519 __builtin_ia32_storedqusi512_mask ((
int *)__P, (__v16si) __A,
4526 __builtin_ia32_storeupd512_mask ((
double *)__P, (__v8df) __A, (
__mmask8) __U);
4532 struct __storeu_pd {
4535 ((
struct __storeu_pd*)__P)->__v = __A;
4541 __builtin_ia32_storeups512_mask ((
float *)__P, (__v16sf) __A,
4548 struct __storeu_ps {
4551 ((
struct __storeu_ps*)__P)->__v = __A;
4557 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (
__mmask8) __U);
4563 *(__m512d*)__P = __A;
4569 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4576 *(__m512*)__P = __A;
4582 *(__m512i *) __P = __A;
4588 *(__m512i *) __P = __A;
4594 *(__m512i *) __P = __A;
4602 return __builtin_ia32_knothi(__M);
4607 #define _mm512_cmpeq_epi32_mask(A, B) \
4608 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4609 #define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4610 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4611 #define _mm512_cmpge_epi32_mask(A, B) \
4612 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4613 #define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4614 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4615 #define _mm512_cmpgt_epi32_mask(A, B) \
4616 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4617 #define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4618 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4619 #define _mm512_cmple_epi32_mask(A, B) \
4620 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4621 #define _mm512_mask_cmple_epi32_mask(k, A, B) \
4622 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4623 #define _mm512_cmplt_epi32_mask(A, B) \
4624 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4625 #define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4626 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4627 #define _mm512_cmpneq_epi32_mask(A, B) \
4628 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4629 #define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4630 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4632 #define _mm512_cmpeq_epu32_mask(A, B) \
4633 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4634 #define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4635 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4636 #define _mm512_cmpge_epu32_mask(A, B) \
4637 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4638 #define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4639 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4640 #define _mm512_cmpgt_epu32_mask(A, B) \
4641 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4642 #define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4643 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4644 #define _mm512_cmple_epu32_mask(A, B) \
4645 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4646 #define _mm512_mask_cmple_epu32_mask(k, A, B) \
4647 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4648 #define _mm512_cmplt_epu32_mask(A, B) \
4649 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4650 #define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4651 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4652 #define _mm512_cmpneq_epu32_mask(A, B) \
4653 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4654 #define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4655 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4657 #define _mm512_cmpeq_epi64_mask(A, B) \
4658 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4659 #define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4660 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4661 #define _mm512_cmpge_epi64_mask(A, B) \
4662 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4663 #define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4664 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4665 #define _mm512_cmpgt_epi64_mask(A, B) \
4666 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4667 #define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4668 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4669 #define _mm512_cmple_epi64_mask(A, B) \
4670 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4671 #define _mm512_mask_cmple_epi64_mask(k, A, B) \
4672 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4673 #define _mm512_cmplt_epi64_mask(A, B) \
4674 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4675 #define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4676 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4677 #define _mm512_cmpneq_epi64_mask(A, B) \
4678 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4679 #define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4680 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4682 #define _mm512_cmpeq_epu64_mask(A, B) \
4683 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4684 #define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4685 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4686 #define _mm512_cmpge_epu64_mask(A, B) \
4687 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4688 #define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4689 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4690 #define _mm512_cmpgt_epu64_mask(A, B) \
4691 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4692 #define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4693 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4694 #define _mm512_cmple_epu64_mask(A, B) \
4695 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4696 #define _mm512_mask_cmple_epu64_mask(k, A, B) \
4697 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4698 #define _mm512_cmplt_epu64_mask(A, B) \
4699 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4700 #define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4701 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4702 #define _mm512_cmpneq_epu64_mask(A, B) \
4703 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4704 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4705 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4712 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4718 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4726 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4736 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4742 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4750 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4758 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4764 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4772 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4780 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4786 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4794 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4802 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4808 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4816 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4824 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4830 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4838 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4846 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4852 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4860 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4868 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4874 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4882 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4890 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4896 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4904 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
4912 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4918 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4926 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
4934 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4940 return (__m512i)__builtin_ia32_selectd_512(__U,
4948 return (__m512i)__builtin_ia32_selectd_512(__U,
4956 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4962 return (__m512i)__builtin_ia32_selectq_512(__U,
4970 return (__m512i)__builtin_ia32_selectq_512(__U,
4977 #define _mm512_cmp_epi32_mask(a, b, p) \
4978 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4979 (__v16si)(__m512i)(b), (int)(p), \
4982 #define _mm512_cmp_epu32_mask(a, b, p) \
4983 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4984 (__v16si)(__m512i)(b), (int)(p), \
4987 #define _mm512_cmp_epi64_mask(a, b, p) \
4988 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4989 (__v8di)(__m512i)(b), (int)(p), \
4992 #define _mm512_cmp_epu64_mask(a, b, p) \
4993 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4994 (__v8di)(__m512i)(b), (int)(p), \
4997 #define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4998 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4999 (__v16si)(__m512i)(b), (int)(p), \
5002 #define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5003 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5004 (__v16si)(__m512i)(b), (int)(p), \
5007 #define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5008 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5009 (__v8di)(__m512i)(b), (int)(p), \
5012 #define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5013 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5014 (__v8di)(__m512i)(b), (int)(p), \
5017 #define _mm512_rol_epi32(a, b) \
5018 (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
5020 #define _mm512_mask_rol_epi32(W, U, a, b) \
5021 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5022 (__v16si)_mm512_rol_epi32((a), (b)), \
5023 (__v16si)(__m512i)(W))
5025 #define _mm512_maskz_rol_epi32(U, a, b) \
5026 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5027 (__v16si)_mm512_rol_epi32((a), (b)), \
5028 (__v16si)_mm512_setzero_si512())
5030 #define _mm512_rol_epi64(a, b) \
5031 (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
5033 #define _mm512_mask_rol_epi64(W, U, a, b) \
5034 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5035 (__v8di)_mm512_rol_epi64((a), (b)), \
5036 (__v8di)(__m512i)(W))
5038 #define _mm512_maskz_rol_epi64(U, a, b) \
5039 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5040 (__v8di)_mm512_rol_epi64((a), (b)), \
5041 (__v8di)_mm512_setzero_si512())
5046 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5052 return (__m512i)__builtin_ia32_selectd_512(__U,
5060 return (__m512i)__builtin_ia32_selectd_512(__U,
5068 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5074 return (__m512i)__builtin_ia32_selectq_512(__U,
5082 return (__m512i)__builtin_ia32_selectq_512(__U,
5087 #define _mm512_ror_epi32(A, B) \
5088 (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
5090 #define _mm512_mask_ror_epi32(W, U, A, B) \
5091 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5092 (__v16si)_mm512_ror_epi32((A), (B)), \
5093 (__v16si)(__m512i)(W))
5095 #define _mm512_maskz_ror_epi32(U, A, B) \
5096 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5097 (__v16si)_mm512_ror_epi32((A), (B)), \
5098 (__v16si)_mm512_setzero_si512())
5100 #define _mm512_ror_epi64(A, B) \
5101 (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
5103 #define _mm512_mask_ror_epi64(W, U, A, B) \
5104 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5105 (__v8di)_mm512_ror_epi64((A), (B)), \
5106 (__v8di)(__m512i)(W))
5108 #define _mm512_maskz_ror_epi64(U, A, B) \
5109 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5110 (__v8di)_mm512_ror_epi64((A), (B)), \
5111 (__v8di)_mm512_setzero_si512())
5116 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5123 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5130 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5138 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5144 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5152 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5160 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5167 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5174 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5182 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5189 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5198 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5206 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5214 return (__m512i) __builtin_ia32_movdqa32load512_mask ((
const __v16si *) __P,
5223 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5230 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
5238 return (__m512i) __builtin_ia32_selectd_512 ((
__mmask16) __U,
5246 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
5254 return (__m512i) __builtin_ia32_selectq_512 ((
__mmask8) __U,
5262 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5270 return (__m512i) __builtin_ia32_movdqa64load512_mask ((
const __v8di *) __P,
5279 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5286 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5287 0, 0, 2, 2, 4, 4, 6, 6);
5293 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5301 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
5306 #define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5307 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5308 (__v8df)(__m512d)(B), \
5309 (__v8di)(__m512i)(C), (int)(imm), \
5310 (__mmask8)-1, (int)(R))
5312 #define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5313 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5314 (__v8df)(__m512d)(B), \
5315 (__v8di)(__m512i)(C), (int)(imm), \
5316 (__mmask8)(U), (int)(R))
5318 #define _mm512_fixupimm_pd(A, B, C, imm) \
5319 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5320 (__v8df)(__m512d)(B), \
5321 (__v8di)(__m512i)(C), (int)(imm), \
5323 _MM_FROUND_CUR_DIRECTION)
5325 #define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5326 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5327 (__v8df)(__m512d)(B), \
5328 (__v8di)(__m512i)(C), (int)(imm), \
5330 _MM_FROUND_CUR_DIRECTION)
5332 #define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5333 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5334 (__v8df)(__m512d)(B), \
5335 (__v8di)(__m512i)(C), \
5336 (int)(imm), (__mmask8)(U), \
5339 #define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5340 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5341 (__v8df)(__m512d)(B), \
5342 (__v8di)(__m512i)(C), \
5343 (int)(imm), (__mmask8)(U), \
5344 _MM_FROUND_CUR_DIRECTION)
5346 #define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5347 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5348 (__v16sf)(__m512)(B), \
5349 (__v16si)(__m512i)(C), (int)(imm), \
5350 (__mmask16)-1, (int)(R))
5352 #define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5353 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5354 (__v16sf)(__m512)(B), \
5355 (__v16si)(__m512i)(C), (int)(imm), \
5356 (__mmask16)(U), (int)(R))
5358 #define _mm512_fixupimm_ps(A, B, C, imm) \
5359 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5360 (__v16sf)(__m512)(B), \
5361 (__v16si)(__m512i)(C), (int)(imm), \
5363 _MM_FROUND_CUR_DIRECTION)
5365 #define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5366 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5367 (__v16sf)(__m512)(B), \
5368 (__v16si)(__m512i)(C), (int)(imm), \
5370 _MM_FROUND_CUR_DIRECTION)
5372 #define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5373 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5374 (__v16sf)(__m512)(B), \
5375 (__v16si)(__m512i)(C), \
5376 (int)(imm), (__mmask16)(U), \
5379 #define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5380 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5381 (__v16sf)(__m512)(B), \
5382 (__v16si)(__m512i)(C), \
5383 (int)(imm), (__mmask16)(U), \
5384 _MM_FROUND_CUR_DIRECTION)
5386 #define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5387 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5388 (__v2df)(__m128d)(B), \
5389 (__v2di)(__m128i)(C), (int)(imm), \
5390 (__mmask8)-1, (int)(R))
5392 #define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5393 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5394 (__v2df)(__m128d)(B), \
5395 (__v2di)(__m128i)(C), (int)(imm), \
5396 (__mmask8)(U), (int)(R))
5398 #define _mm_fixupimm_sd(A, B, C, imm) \
5399 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5400 (__v2df)(__m128d)(B), \
5401 (__v2di)(__m128i)(C), (int)(imm), \
5403 _MM_FROUND_CUR_DIRECTION)
5405 #define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5406 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5407 (__v2df)(__m128d)(B), \
5408 (__v2di)(__m128i)(C), (int)(imm), \
5410 _MM_FROUND_CUR_DIRECTION)
5412 #define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5413 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5414 (__v2df)(__m128d)(B), \
5415 (__v2di)(__m128i)(C), (int)(imm), \
5416 (__mmask8)(U), (int)(R))
5418 #define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5419 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5420 (__v2df)(__m128d)(B), \
5421 (__v2di)(__m128i)(C), (int)(imm), \
5423 _MM_FROUND_CUR_DIRECTION)
5425 #define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5426 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5427 (__v4sf)(__m128)(B), \
5428 (__v4si)(__m128i)(C), (int)(imm), \
5429 (__mmask8)-1, (int)(R))
5431 #define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5432 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5433 (__v4sf)(__m128)(B), \
5434 (__v4si)(__m128i)(C), (int)(imm), \
5435 (__mmask8)(U), (int)(R))
5437 #define _mm_fixupimm_ss(A, B, C, imm) \
5438 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5439 (__v4sf)(__m128)(B), \
5440 (__v4si)(__m128i)(C), (int)(imm), \
5442 _MM_FROUND_CUR_DIRECTION)
5444 #define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5445 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5446 (__v4sf)(__m128)(B), \
5447 (__v4si)(__m128i)(C), (int)(imm), \
5449 _MM_FROUND_CUR_DIRECTION)
5451 #define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5452 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5453 (__v4sf)(__m128)(B), \
5454 (__v4si)(__m128i)(C), (int)(imm), \
5455 (__mmask8)(U), (int)(R))
5457 #define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5458 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5459 (__v4sf)(__m128)(B), \
5460 (__v4si)(__m128i)(C), (int)(imm), \
5462 _MM_FROUND_CUR_DIRECTION)
5464 #define _mm_getexp_round_sd(A, B, R) \
5465 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5466 (__v2df)(__m128d)(B), \
5467 (__v2df)_mm_setzero_pd(), \
5468 (__mmask8)-1, (int)(R))
5474 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5481 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5488 #define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5489 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5490 (__v2df)(__m128d)(B), \
5491 (__v2df)(__m128d)(W), \
5492 (__mmask8)(U), (int)(R))
5497 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5504 #define _mm_maskz_getexp_round_sd(U, A, B, R) \
5505 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5506 (__v2df)(__m128d)(B), \
5507 (__v2df)_mm_setzero_pd(), \
5508 (__mmask8)(U), (int)(R))
5510 #define _mm_getexp_round_ss(A, B, R) \
5511 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5512 (__v4sf)(__m128)(B), \
5513 (__v4sf)_mm_setzero_ps(), \
5514 (__mmask8)-1, (int)(R))
5519 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5526 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5533 #define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5534 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5535 (__v4sf)(__m128)(B), \
5536 (__v4sf)(__m128)(W), \
5537 (__mmask8)(U), (int)(R))
5542 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5549 #define _mm_maskz_getexp_round_ss(U, A, B, R) \
5550 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5551 (__v4sf)(__m128)(B), \
5552 (__v4sf)_mm_setzero_ps(), \
5553 (__mmask8)(U), (int)(R))
5555 #define _mm_getmant_round_sd(A, B, C, D, R) \
5556 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5557 (__v2df)(__m128d)(B), \
5558 (int)(((D)<<2) | (C)), \
5559 (__v2df)_mm_setzero_pd(), \
5560 (__mmask8)-1, (int)(R))
5562 #define _mm_getmant_sd(A, B, C, D) \
5563 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5564 (__v2df)(__m128d)(B), \
5565 (int)(((D)<<2) | (C)), \
5566 (__v2df)_mm_setzero_pd(), \
5568 _MM_FROUND_CUR_DIRECTION)
5570 #define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5571 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5572 (__v2df)(__m128d)(B), \
5573 (int)(((D)<<2) | (C)), \
5574 (__v2df)(__m128d)(W), \
5576 _MM_FROUND_CUR_DIRECTION)
5578 #define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5579 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5580 (__v2df)(__m128d)(B), \
5581 (int)(((D)<<2) | (C)), \
5582 (__v2df)(__m128d)(W), \
5583 (__mmask8)(U), (int)(R))
5585 #define _mm_maskz_getmant_sd(U, A, B, C, D) \
5586 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5587 (__v2df)(__m128d)(B), \
5588 (int)(((D)<<2) | (C)), \
5589 (__v2df)_mm_setzero_pd(), \
5591 _MM_FROUND_CUR_DIRECTION)
5593 #define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5594 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5595 (__v2df)(__m128d)(B), \
5596 (int)(((D)<<2) | (C)), \
5597 (__v2df)_mm_setzero_pd(), \
5598 (__mmask8)(U), (int)(R))
5600 #define _mm_getmant_round_ss(A, B, C, D, R) \
5601 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5602 (__v4sf)(__m128)(B), \
5603 (int)(((D)<<2) | (C)), \
5604 (__v4sf)_mm_setzero_ps(), \
5605 (__mmask8)-1, (int)(R))
5607 #define _mm_getmant_ss(A, B, C, D) \
5608 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5609 (__v4sf)(__m128)(B), \
5610 (int)(((D)<<2) | (C)), \
5611 (__v4sf)_mm_setzero_ps(), \
5613 _MM_FROUND_CUR_DIRECTION)
5615 #define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5616 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5617 (__v4sf)(__m128)(B), \
5618 (int)(((D)<<2) | (C)), \
5619 (__v4sf)(__m128)(W), \
5621 _MM_FROUND_CUR_DIRECTION)
5623 #define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5624 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5625 (__v4sf)(__m128)(B), \
5626 (int)(((D)<<2) | (C)), \
5627 (__v4sf)(__m128)(W), \
5628 (__mmask8)(U), (int)(R))
5630 #define _mm_maskz_getmant_ss(U, A, B, C, D) \
5631 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5632 (__v4sf)(__m128)(B), \
5633 (int)(((D)<<2) | (C)), \
5634 (__v4sf)_mm_setzero_ps(), \
5636 _MM_FROUND_CUR_DIRECTION)
5638 #define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5639 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5640 (__v4sf)(__m128)(B), \
5641 (int)(((D)<<2) | (C)), \
5642 (__v4sf)_mm_setzero_ps(), \
5643 (__mmask8)(U), (int)(R))
5651 #define _mm_comi_round_sd(A, B, P, R) \
5652 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5655 #define _mm_comi_round_ss(A, B, P, R) \
5656 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5660 #define _mm_cvt_roundsd_si64(A, R) \
5661 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5667 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5673 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5681 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5689 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5695 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5703 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5711 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5717 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5725 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5733 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5739 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5747 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5755 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5761 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5769 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5777 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5783 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5791 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5799 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5805 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5813 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5821 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5827 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5835 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5843 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5849 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5857 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5865 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5871 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5879 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5887 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5893 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5901 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
5909 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5915 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5923 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
5928 #define _mm512_ternarylogic_epi32(A, B, C, imm) \
5929 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5930 (__v16si)(__m512i)(B), \
5931 (__v16si)(__m512i)(C), (int)(imm), \
5934 #define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5935 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5936 (__v16si)(__m512i)(B), \
5937 (__v16si)(__m512i)(C), (int)(imm), \
5940 #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5941 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5942 (__v16si)(__m512i)(B), \
5943 (__v16si)(__m512i)(C), \
5944 (int)(imm), (__mmask16)(U))
5946 #define _mm512_ternarylogic_epi64(A, B, C, imm) \
5947 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5948 (__v8di)(__m512i)(B), \
5949 (__v8di)(__m512i)(C), (int)(imm), \
5952 #define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5953 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5954 (__v8di)(__m512i)(B), \
5955 (__v8di)(__m512i)(C), (int)(imm), \
5958 #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5959 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5960 (__v8di)(__m512i)(B), \
5961 (__v8di)(__m512i)(C), (int)(imm), \
5965 #define _mm_cvt_roundsd_i64(A, R) \
5966 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5969 #define _mm_cvt_roundsd_si32(A, R) \
5970 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5972 #define _mm_cvt_roundsd_i32(A, R) \
5973 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5975 #define _mm_cvt_roundsd_u32(A, R) \
5976 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
5981 return (
unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5986 #define _mm_cvt_roundsd_u64(A, R) \
5987 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5991 _mm_cvtsd_u64 (__m128d __A)
5993 return (
unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5999 #define _mm_cvt_roundss_si32(A, R) \
6000 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6002 #define _mm_cvt_roundss_i32(A, R) \
6003 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6006 #define _mm_cvt_roundss_si64(A, R) \
6007 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6009 #define _mm_cvt_roundss_i64(A, R) \
6010 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6013 #define _mm_cvt_roundss_u32(A, R) \
6014 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
6019 return (
unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6024 #define _mm_cvt_roundss_u64(A, R) \
6025 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6029 _mm_cvtss_u64 (__m128 __A)
6031 return (
unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6037 #define _mm_cvtt_roundsd_i32(A, R) \
6038 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6040 #define _mm_cvtt_roundsd_si32(A, R) \
6041 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6046 return (
int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6051 #define _mm_cvtt_roundsd_si64(A, R) \
6052 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6054 #define _mm_cvtt_roundsd_i64(A, R) \
6055 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6058 _mm_cvttsd_i64 (__m128d __A)
6060 return (
long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6065 #define _mm_cvtt_roundsd_u32(A, R) \
6066 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
6071 return (
unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6076 #define _mm_cvtt_roundsd_u64(A, R) \
6077 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6081 _mm_cvttsd_u64 (__m128d __A)
6083 return (
unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6089 #define _mm_cvtt_roundss_i32(A, R) \
6090 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6092 #define _mm_cvtt_roundss_si32(A, R) \
6093 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6098 return (
int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6103 #define _mm_cvtt_roundss_i64(A, R) \
6104 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6106 #define _mm_cvtt_roundss_si64(A, R) \
6107 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6110 _mm_cvttss_i64 (__m128 __A)
6112 return (
long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6117 #define _mm_cvtt_roundss_u32(A, R) \
6118 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
6123 return (
unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6128 #define _mm_cvtt_roundss_u64(A, R) \
6129 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6133 _mm_cvttss_u64 (__m128 __A)
6135 return (
unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6141 #define _mm512_permute_pd(X, C) \
6142 (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
6144 #define _mm512_mask_permute_pd(W, U, X, C) \
6145 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6146 (__v8df)_mm512_permute_pd((X), (C)), \
6147 (__v8df)(__m512d)(W))
6149 #define _mm512_maskz_permute_pd(U, X, C) \
6150 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6151 (__v8df)_mm512_permute_pd((X), (C)), \
6152 (__v8df)_mm512_setzero_pd())
6154 #define _mm512_permute_ps(X, C) \
6155 (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
6157 #define _mm512_mask_permute_ps(W, U, X, C) \
6158 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6159 (__v16sf)_mm512_permute_ps((X), (C)), \
6160 (__v16sf)(__m512)(W))
6162 #define _mm512_maskz_permute_ps(U, X, C) \
6163 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6164 (__v16sf)_mm512_permute_ps((X), (C)), \
6165 (__v16sf)_mm512_setzero_ps())
6170 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)
__C);
6176 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
6184 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
6192 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)
__C);
6198 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
6206 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
6214 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6221 return (__m512d)__builtin_ia32_selectpd_512(__U,
6230 return (__m512d)__builtin_ia32_selectpd_512(__U,
6232 (__v8df)(__m512d)__I);
6239 return (__m512d)__builtin_ia32_selectpd_512(__U,
6247 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6254 return (__m512)__builtin_ia32_selectps_512(__U,
6262 return (__m512)__builtin_ia32_selectps_512(__U,
6264 (__v16sf)(__m512)__I);
6270 return (__m512)__builtin_ia32_selectps_512(__U,
6276 #define _mm512_cvtt_roundpd_epu32(A, R) \
6277 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6278 (__v8si)_mm256_undefined_si256(), \
6279 (__mmask8)-1, (int)(R))
6281 #define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6282 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6283 (__v8si)(__m256i)(W), \
6284 (__mmask8)(U), (int)(R))
6286 #define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6287 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6288 (__v8si)_mm256_setzero_si256(), \
6289 (__mmask8)(U), (int)(R))
6294 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6304 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6313 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6320 #define _mm_roundscale_round_sd(A, B, imm, R) \
6321 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6322 (__v2df)(__m128d)(B), \
6323 (__v2df)_mm_setzero_pd(), \
6324 (__mmask8)-1, (int)(imm), \
6327 #define _mm_roundscale_sd(A, B, imm) \
6328 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6329 (__v2df)(__m128d)(B), \
6330 (__v2df)_mm_setzero_pd(), \
6331 (__mmask8)-1, (int)(imm), \
6332 _MM_FROUND_CUR_DIRECTION)
6334 #define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6335 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6336 (__v2df)(__m128d)(B), \
6337 (__v2df)(__m128d)(W), \
6338 (__mmask8)(U), (int)(imm), \
6339 _MM_FROUND_CUR_DIRECTION)
6341 #define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6342 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6343 (__v2df)(__m128d)(B), \
6344 (__v2df)(__m128d)(W), \
6345 (__mmask8)(U), (int)(I), \
6348 #define _mm_maskz_roundscale_sd(U, A, B, I) \
6349 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6350 (__v2df)(__m128d)(B), \
6351 (__v2df)_mm_setzero_pd(), \
6352 (__mmask8)(U), (int)(I), \
6353 _MM_FROUND_CUR_DIRECTION)
6355 #define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6356 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6357 (__v2df)(__m128d)(B), \
6358 (__v2df)_mm_setzero_pd(), \
6359 (__mmask8)(U), (int)(I), \
6362 #define _mm_roundscale_round_ss(A, B, imm, R) \
6363 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6364 (__v4sf)(__m128)(B), \
6365 (__v4sf)_mm_setzero_ps(), \
6366 (__mmask8)-1, (int)(imm), \
6369 #define _mm_roundscale_ss(A, B, imm) \
6370 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6371 (__v4sf)(__m128)(B), \
6372 (__v4sf)_mm_setzero_ps(), \
6373 (__mmask8)-1, (int)(imm), \
6374 _MM_FROUND_CUR_DIRECTION)
6376 #define _mm_mask_roundscale_ss(W, U, A, B, I) \
6377 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6378 (__v4sf)(__m128)(B), \
6379 (__v4sf)(__m128)(W), \
6380 (__mmask8)(U), (int)(I), \
6381 _MM_FROUND_CUR_DIRECTION)
6383 #define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6384 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6385 (__v4sf)(__m128)(B), \
6386 (__v4sf)(__m128)(W), \
6387 (__mmask8)(U), (int)(I), \
6390 #define _mm_maskz_roundscale_ss(U, A, B, I) \
6391 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6392 (__v4sf)(__m128)(B), \
6393 (__v4sf)_mm_setzero_ps(), \
6394 (__mmask8)(U), (int)(I), \
6395 _MM_FROUND_CUR_DIRECTION)
6397 #define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6398 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6399 (__v4sf)(__m128)(B), \
6400 (__v4sf)_mm_setzero_ps(), \
6401 (__mmask8)(U), (int)(I), \
6404 #define _mm512_scalef_round_pd(A, B, R) \
6405 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6406 (__v8df)(__m512d)(B), \
6407 (__v8df)_mm512_undefined_pd(), \
6408 (__mmask8)-1, (int)(R))
6410 #define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6411 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6412 (__v8df)(__m512d)(B), \
6413 (__v8df)(__m512d)(W), \
6414 (__mmask8)(U), (int)(R))
6416 #define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6417 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6418 (__v8df)(__m512d)(B), \
6419 (__v8df)_mm512_setzero_pd(), \
6420 (__mmask8)(U), (int)(R))
6425 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6436 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6446 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6454 #define _mm512_scalef_round_ps(A, B, R) \
6455 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6456 (__v16sf)(__m512)(B), \
6457 (__v16sf)_mm512_undefined_ps(), \
6458 (__mmask16)-1, (int)(R))
6460 #define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6461 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6462 (__v16sf)(__m512)(B), \
6463 (__v16sf)(__m512)(W), \
6464 (__mmask16)(U), (int)(R))
6466 #define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6467 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6468 (__v16sf)(__m512)(B), \
6469 (__v16sf)_mm512_setzero_ps(), \
6470 (__mmask16)(U), (int)(R))
6475 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6486 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6496 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6504 #define _mm_scalef_round_sd(A, B, R) \
6505 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6506 (__v2df)(__m128d)(B), \
6507 (__v2df)_mm_setzero_pd(), \
6508 (__mmask8)-1, (int)(R))
6513 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6522 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6529 #define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6530 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6531 (__v2df)(__m128d)(B), \
6532 (__v2df)(__m128d)(W), \
6533 (__mmask8)(U), (int)(R))
6538 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6545 #define _mm_maskz_scalef_round_sd(U, A, B, R) \
6546 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6547 (__v2df)(__m128d)(B), \
6548 (__v2df)_mm_setzero_pd(), \
6549 (__mmask8)(U), (int)(R))
6551 #define _mm_scalef_round_ss(A, B, R) \
6552 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6553 (__v4sf)(__m128)(B), \
6554 (__v4sf)_mm_setzero_ps(), \
6555 (__mmask8)-1, (int)(R))
6560 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6569 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6576 #define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6577 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6578 (__v4sf)(__m128)(B), \
6579 (__v4sf)(__m128)(W), \
6580 (__mmask8)(U), (int)(R))
6585 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6592 #define _mm_maskz_scalef_round_ss(U, A, B, R) \
6593 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6594 (__v4sf)(__m128)(B), \
6595 (__v4sf)_mm_setzero_ps(), \
6602 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6609 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6617 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__U,
6625 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6631 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6639 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__U,
6644 #define _mm512_shuffle_f32x4(A, B, imm) \
6645 (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6646 (__v16sf)(__m512)(B), (int)(imm))
6648 #define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6649 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6650 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6651 (__v16sf)(__m512)(W))
6653 #define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6654 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6655 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6656 (__v16sf)_mm512_setzero_ps())
6658 #define _mm512_shuffle_f64x2(A, B, imm) \
6659 (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6660 (__v8df)(__m512d)(B), (int)(imm))
6662 #define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6663 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6664 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6665 (__v8df)(__m512d)(W))
6667 #define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6668 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6669 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6670 (__v8df)_mm512_setzero_pd())
6672 #define _mm512_shuffle_i32x4(A, B, imm) \
6673 (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6674 (__v16si)(__m512i)(B), (int)(imm))
6676 #define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6677 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6678 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6679 (__v16si)(__m512i)(W))
6681 #define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6682 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6683 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6684 (__v16si)_mm512_setzero_si512())
6686 #define _mm512_shuffle_i64x2(A, B, imm) \
6687 (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6688 (__v8di)(__m512i)(B), (int)(imm))
6690 #define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6691 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6692 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6693 (__v8di)(__m512i)(W))
6695 #define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6696 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6697 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6698 (__v8di)_mm512_setzero_si512())
6700 #define _mm512_shuffle_pd(A, B, M) \
6701 (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6702 (__v8df)(__m512d)(B), (int)(M))
6704 #define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6705 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6706 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6707 (__v8df)(__m512d)(W))
6709 #define _mm512_maskz_shuffle_pd(U, A, B, M) \
6710 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6711 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6712 (__v8df)_mm512_setzero_pd())
6714 #define _mm512_shuffle_ps(A, B, M) \
6715 (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6716 (__v16sf)(__m512)(B), (int)(M))
6718 #define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6719 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6720 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6721 (__v16sf)(__m512)(W))
6723 #define _mm512_maskz_shuffle_ps(U, A, B, M) \
6724 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6725 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6726 (__v16sf)_mm512_setzero_ps())
6728 #define _mm_sqrt_round_sd(A, B, R) \
6729 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6730 (__v2df)(__m128d)(B), \
6731 (__v2df)_mm_setzero_pd(), \
6732 (__mmask8)-1, (int)(R))
6737 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6744 #define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6745 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6746 (__v2df)(__m128d)(B), \
6747 (__v2df)(__m128d)(W), \
6748 (__mmask8)(U), (int)(R))
6753 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6760 #define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6761 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6762 (__v2df)(__m128d)(B), \
6763 (__v2df)_mm_setzero_pd(), \
6764 (__mmask8)(U), (int)(R))
6766 #define _mm_sqrt_round_ss(A, B, R) \
6767 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6768 (__v4sf)(__m128)(B), \
6769 (__v4sf)_mm_setzero_ps(), \
6770 (__mmask8)-1, (int)(R))
6775 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6782 #define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6783 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6784 (__v4sf)(__m128)(B), \
6785 (__v4sf)(__m128)(W), (__mmask8)(U), \
6791 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6798 #define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6799 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6800 (__v4sf)(__m128)(B), \
6801 (__v4sf)_mm_setzero_ps(), \
6802 (__mmask8)(U), (int)(R))
6807 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6808 0, 1, 2, 3, 0, 1, 2, 3,
6809 0, 1, 2, 3, 0, 1, 2, 3);
6815 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6823 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__M,
6831 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6832 0, 1, 2, 3, 0, 1, 2, 3);
6838 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6846 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__M,
6854 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6855 0, 1, 2, 3, 0, 1, 2, 3,
6856 0, 1, 2, 3, 0, 1, 2, 3);
6862 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6870 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
6878 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6879 0, 1, 2, 3, 0, 1, 2, 3);
6885 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6893 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
6901 return (__m512d)__builtin_ia32_selectpd_512(__M,
6909 return (__m512d)__builtin_ia32_selectpd_512(__M,
6917 return (__m512)__builtin_ia32_selectps_512(__M,
6925 return (__m512)__builtin_ia32_selectps_512(__M,
6933 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6941 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6942 (__v16qi) __O, __M);
6948 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6956 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6962 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6970 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6971 (__v16hi) __O, __M);
6977 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6985 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6991 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6999 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7000 (__v16qi) __O, __M);
7006 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7014 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7020 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7028 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7035 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7043 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7049 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7057 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7064 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7072 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7078 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7086 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7094 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7102 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7108 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7116 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7124 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7132 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7138 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7146 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7154 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7162 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7168 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7176 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7183 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7191 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7197 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7205 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7212 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7220 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7226 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7234 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7235 (__v16qi) __O, __M);
7241 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7249 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7255 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7263 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7264 (__v16hi) __O, __M);
7270 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7278 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7284 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7292 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7293 (__v16qi) __O, __M);
7299 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7307 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7313 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7321 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7328 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7336 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7342 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7350 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7357 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7365 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7368 #define _mm512_extracti32x4_epi32(A, imm) \
7369 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7370 (__v4si)_mm_undefined_si128(), \
7373 #define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7374 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7375 (__v4si)(__m128i)(W), \
7378 #define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7379 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7380 (__v4si)_mm_setzero_si128(), \
7383 #define _mm512_extracti64x4_epi64(A, imm) \
7384 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7385 (__v4di)_mm256_undefined_si256(), \
7388 #define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7389 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7390 (__v4di)(__m256i)(W), \
7393 #define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7394 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7395 (__v4di)_mm256_setzero_si256(), \
7398 #define _mm512_insertf64x4(A, B, imm) \
7399 (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7400 (__v4df)(__m256d)(B), (int)(imm))
7402 #define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7403 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7404 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7405 (__v8df)(__m512d)(W))
7407 #define _mm512_maskz_insertf64x4(U, A, B, imm) \
7408 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7409 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7410 (__v8df)_mm512_setzero_pd())
7412 #define _mm512_inserti64x4(A, B, imm) \
7413 (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7414 (__v4di)(__m256i)(B), (int)(imm))
7416 #define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7417 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7418 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7419 (__v8di)(__m512i)(W))
7421 #define _mm512_maskz_inserti64x4(U, A, B, imm) \
7422 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7423 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7424 (__v8di)_mm512_setzero_si512())
7426 #define _mm512_insertf32x4(A, B, imm) \
7427 (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7428 (__v4sf)(__m128)(B), (int)(imm))
7430 #define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7431 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7432 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7433 (__v16sf)(__m512)(W))
7435 #define _mm512_maskz_insertf32x4(U, A, B, imm) \
7436 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7437 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7438 (__v16sf)_mm512_setzero_ps())
7440 #define _mm512_inserti32x4(A, B, imm) \
7441 (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7442 (__v4si)(__m128i)(B), (int)(imm))
7444 #define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7445 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7446 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7447 (__v16si)(__m512i)(W))
7449 #define _mm512_maskz_inserti32x4(U, A, B, imm) \
7450 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7451 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7452 (__v16si)_mm512_setzero_si512())
7454 #define _mm512_getmant_round_pd(A, B, C, R) \
7455 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7456 (int)(((C)<<2) | (B)), \
7457 (__v8df)_mm512_undefined_pd(), \
7458 (__mmask8)-1, (int)(R))
7460 #define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7461 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7462 (int)(((C)<<2) | (B)), \
7463 (__v8df)(__m512d)(W), \
7464 (__mmask8)(U), (int)(R))
7466 #define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7467 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7468 (int)(((C)<<2) | (B)), \
7469 (__v8df)_mm512_setzero_pd(), \
7470 (__mmask8)(U), (int)(R))
7472 #define _mm512_getmant_pd(A, B, C) \
7473 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7474 (int)(((C)<<2) | (B)), \
7475 (__v8df)_mm512_setzero_pd(), \
7477 _MM_FROUND_CUR_DIRECTION)
7479 #define _mm512_mask_getmant_pd(W, U, A, B, C) \
7480 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7481 (int)(((C)<<2) | (B)), \
7482 (__v8df)(__m512d)(W), \
7484 _MM_FROUND_CUR_DIRECTION)
7486 #define _mm512_maskz_getmant_pd(U, A, B, C) \
7487 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7488 (int)(((C)<<2) | (B)), \
7489 (__v8df)_mm512_setzero_pd(), \
7491 _MM_FROUND_CUR_DIRECTION)
7493 #define _mm512_getmant_round_ps(A, B, C, R) \
7494 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7495 (int)(((C)<<2) | (B)), \
7496 (__v16sf)_mm512_undefined_ps(), \
7497 (__mmask16)-1, (int)(R))
7499 #define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7500 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7501 (int)(((C)<<2) | (B)), \
7502 (__v16sf)(__m512)(W), \
7503 (__mmask16)(U), (int)(R))
7505 #define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7506 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7507 (int)(((C)<<2) | (B)), \
7508 (__v16sf)_mm512_setzero_ps(), \
7509 (__mmask16)(U), (int)(R))
7511 #define _mm512_getmant_ps(A, B, C) \
7512 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7513 (int)(((C)<<2)|(B)), \
7514 (__v16sf)_mm512_undefined_ps(), \
7516 _MM_FROUND_CUR_DIRECTION)
7518 #define _mm512_mask_getmant_ps(W, U, A, B, C) \
7519 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7520 (int)(((C)<<2)|(B)), \
7521 (__v16sf)(__m512)(W), \
7523 _MM_FROUND_CUR_DIRECTION)
7525 #define _mm512_maskz_getmant_ps(U, A, B, C) \
7526 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7527 (int)(((C)<<2)|(B)), \
7528 (__v16sf)_mm512_setzero_ps(), \
7530 _MM_FROUND_CUR_DIRECTION)
7532 #define _mm512_getexp_round_pd(A, R) \
7533 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7534 (__v8df)_mm512_undefined_pd(), \
7535 (__mmask8)-1, (int)(R))
7537 #define _mm512_mask_getexp_round_pd(W, U, A, R) \
7538 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7539 (__v8df)(__m512d)(W), \
7540 (__mmask8)(U), (int)(R))
7542 #define _mm512_maskz_getexp_round_pd(U, A, R) \
7543 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7544 (__v8df)_mm512_setzero_pd(), \
7545 (__mmask8)(U), (int)(R))
7550 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7559 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7568 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7574 #define _mm512_getexp_round_ps(A, R) \
7575 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7576 (__v16sf)_mm512_undefined_ps(), \
7577 (__mmask16)-1, (int)(R))
7579 #define _mm512_mask_getexp_round_ps(W, U, A, R) \
7580 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7581 (__v16sf)(__m512)(W), \
7582 (__mmask16)(U), (int)(R))
7584 #define _mm512_maskz_getexp_round_ps(U, A, R) \
7585 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7586 (__v16sf)_mm512_setzero_ps(), \
7587 (__mmask16)(U), (int)(R))
7592 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7601 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7610 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7616 #define _mm512_i64gather_ps(index, addr, scale) \
7617 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7618 (void const *)(addr), \
7619 (__v8di)(__m512i)(index), (__mmask8)-1, \
7622 #define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7623 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7624 (void const *)(addr), \
7625 (__v8di)(__m512i)(index), \
7626 (__mmask8)(mask), (int)(scale))
7628 #define _mm512_i64gather_epi32(index, addr, scale) \
7629 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7630 (void const *)(addr), \
7631 (__v8di)(__m512i)(index), \
7632 (__mmask8)-1, (int)(scale))
7634 #define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7635 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7636 (void const *)(addr), \
7637 (__v8di)(__m512i)(index), \
7638 (__mmask8)(mask), (int)(scale))
7640 #define _mm512_i64gather_pd(index, addr, scale) \
7641 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7642 (void const *)(addr), \
7643 (__v8di)(__m512i)(index), (__mmask8)-1, \
7646 #define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7647 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7648 (void const *)(addr), \
7649 (__v8di)(__m512i)(index), \
7650 (__mmask8)(mask), (int)(scale))
7652 #define _mm512_i64gather_epi64(index, addr, scale) \
7653 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7654 (void const *)(addr), \
7655 (__v8di)(__m512i)(index), (__mmask8)-1, \
7658 #define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7659 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7660 (void const *)(addr), \
7661 (__v8di)(__m512i)(index), \
7662 (__mmask8)(mask), (int)(scale))
7664 #define _mm512_i32gather_ps(index, addr, scale) \
7665 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7666 (void const *)(addr), \
7667 (__v16si)(__m512)(index), \
7668 (__mmask16)-1, (int)(scale))
7670 #define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7671 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7672 (void const *)(addr), \
7673 (__v16si)(__m512)(index), \
7674 (__mmask16)(mask), (int)(scale))
7676 #define _mm512_i32gather_epi32(index, addr, scale) \
7677 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7678 (void const *)(addr), \
7679 (__v16si)(__m512i)(index), \
7680 (__mmask16)-1, (int)(scale))
7682 #define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7683 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7684 (void const *)(addr), \
7685 (__v16si)(__m512i)(index), \
7686 (__mmask16)(mask), (int)(scale))
7688 #define _mm512_i32gather_pd(index, addr, scale) \
7689 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7690 (void const *)(addr), \
7691 (__v8si)(__m256i)(index), (__mmask8)-1, \
7694 #define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7695 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7696 (void const *)(addr), \
7697 (__v8si)(__m256i)(index), \
7698 (__mmask8)(mask), (int)(scale))
7700 #define _mm512_i32gather_epi64(index, addr, scale) \
7701 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7702 (void const *)(addr), \
7703 (__v8si)(__m256i)(index), (__mmask8)-1, \
7706 #define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7707 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7708 (void const *)(addr), \
7709 (__v8si)(__m256i)(index), \
7710 (__mmask8)(mask), (int)(scale))
7712 #define _mm512_i64scatter_ps(addr, index, v1, scale) \
7713 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7714 (__v8di)(__m512i)(index), \
7715 (__v8sf)(__m256)(v1), (int)(scale))
7717 #define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7718 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7719 (__v8di)(__m512i)(index), \
7720 (__v8sf)(__m256)(v1), (int)(scale))
7722 #define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7723 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7724 (__v8di)(__m512i)(index), \
7725 (__v8si)(__m256i)(v1), (int)(scale))
7727 #define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7728 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7729 (__v8di)(__m512i)(index), \
7730 (__v8si)(__m256i)(v1), (int)(scale))
7732 #define _mm512_i64scatter_pd(addr, index, v1, scale) \
7733 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7734 (__v8di)(__m512i)(index), \
7735 (__v8df)(__m512d)(v1), (int)(scale))
7737 #define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7738 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7739 (__v8di)(__m512i)(index), \
7740 (__v8df)(__m512d)(v1), (int)(scale))
7742 #define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7743 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7744 (__v8di)(__m512i)(index), \
7745 (__v8di)(__m512i)(v1), (int)(scale))
7747 #define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7748 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7749 (__v8di)(__m512i)(index), \
7750 (__v8di)(__m512i)(v1), (int)(scale))
7752 #define _mm512_i32scatter_ps(addr, index, v1, scale) \
7753 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7754 (__v16si)(__m512i)(index), \
7755 (__v16sf)(__m512)(v1), (int)(scale))
7757 #define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7758 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7759 (__v16si)(__m512i)(index), \
7760 (__v16sf)(__m512)(v1), (int)(scale))
7762 #define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7763 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7764 (__v16si)(__m512i)(index), \
7765 (__v16si)(__m512i)(v1), (int)(scale))
7767 #define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7768 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7769 (__v16si)(__m512i)(index), \
7770 (__v16si)(__m512i)(v1), (int)(scale))
7772 #define _mm512_i32scatter_pd(addr, index, v1, scale) \
7773 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7774 (__v8si)(__m256i)(index), \
7775 (__v8df)(__m512d)(v1), (int)(scale))
7777 #define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7778 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7779 (__v8si)(__m256i)(index), \
7780 (__v8df)(__m512d)(v1), (int)(scale))
7782 #define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7783 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7784 (__v8si)(__m256i)(index), \
7785 (__v8di)(__m512i)(v1), (int)(scale))
7787 #define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7788 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7789 (__v8si)(__m256i)(index), \
7790 (__v8di)(__m512i)(v1), (int)(scale))
7795 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7802 #define _mm_fmadd_round_ss(A, B, C, R) \
7803 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7804 (__v4sf)(__m128)(B), \
7805 (__v4sf)(__m128)(C), (__mmask8)-1, \
7808 #define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7809 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7810 (__v4sf)(__m128)(A), \
7811 (__v4sf)(__m128)(B), (__mmask8)(U), \
7817 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7824 #define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7825 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7826 (__v4sf)(__m128)(B), \
7827 (__v4sf)(__m128)(C), (__mmask8)(U), \
7833 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7840 #define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7841 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7842 (__v4sf)(__m128)(X), \
7843 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7849 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7856 #define _mm_fmsub_round_ss(A, B, C, R) \
7857 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7858 (__v4sf)(__m128)(B), \
7859 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7862 #define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7863 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7864 (__v4sf)(__m128)(A), \
7865 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7871 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7878 #define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7879 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7880 (__v4sf)(__m128)(B), \
7881 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7887 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7894 #define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7895 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7896 (__v4sf)(__m128)(X), \
7897 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7903 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7910 #define _mm_fnmadd_round_ss(A, B, C, R) \
7911 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7912 -(__v4sf)(__m128)(B), \
7913 (__v4sf)(__m128)(C), (__mmask8)-1, \
7916 #define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7917 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7918 -(__v4sf)(__m128)(A), \
7919 (__v4sf)(__m128)(B), (__mmask8)(U), \
7925 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7932 #define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7933 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7934 -(__v4sf)(__m128)(B), \
7935 (__v4sf)(__m128)(C), (__mmask8)(U), \
7941 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7948 #define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7949 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7950 -(__v4sf)(__m128)(X), \
7951 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7957 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7964 #define _mm_fnmsub_round_ss(A, B, C, R) \
7965 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7966 -(__v4sf)(__m128)(B), \
7967 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7970 #define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7971 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7972 -(__v4sf)(__m128)(A), \
7973 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7979 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7986 #define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7987 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7988 -(__v4sf)(__m128)(B), \
7989 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7995 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
8002 #define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8003 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8004 -(__v4sf)(__m128)(X), \
8005 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8011 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8018 #define _mm_fmadd_round_sd(A, B, C, R) \
8019 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8020 (__v2df)(__m128d)(B), \
8021 (__v2df)(__m128d)(C), (__mmask8)-1, \
8024 #define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8025 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8026 (__v2df)(__m128d)(A), \
8027 (__v2df)(__m128d)(B), (__mmask8)(U), \
8033 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8040 #define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8041 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8042 (__v2df)(__m128d)(B), \
8043 (__v2df)(__m128d)(C), (__mmask8)(U), \
8049 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8056 #define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8057 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8058 (__v2df)(__m128d)(X), \
8059 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8065 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8072 #define _mm_fmsub_round_sd(A, B, C, R) \
8073 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8074 (__v2df)(__m128d)(B), \
8075 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8078 #define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8079 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8080 (__v2df)(__m128d)(A), \
8081 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8087 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8094 #define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8095 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8096 (__v2df)(__m128d)(B), \
8097 -(__v2df)(__m128d)(C), \
8098 (__mmask8)(U), (int)(R))
8103 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8110 #define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8111 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8112 (__v2df)(__m128d)(X), \
8113 (__v2df)(__m128d)(Y), \
8114 (__mmask8)(U), (int)(R))
8119 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8126 #define _mm_fnmadd_round_sd(A, B, C, R) \
8127 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8128 -(__v2df)(__m128d)(B), \
8129 (__v2df)(__m128d)(C), (__mmask8)-1, \
8132 #define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8133 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8134 -(__v2df)(__m128d)(A), \
8135 (__v2df)(__m128d)(B), (__mmask8)(U), \
8141 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8148 #define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8149 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8150 -(__v2df)(__m128d)(B), \
8151 (__v2df)(__m128d)(C), (__mmask8)(U), \
8157 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8164 #define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8165 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8166 -(__v2df)(__m128d)(X), \
8167 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8173 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8180 #define _mm_fnmsub_round_sd(A, B, C, R) \
8181 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8182 -(__v2df)(__m128d)(B), \
8183 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8186 #define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8187 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8188 -(__v2df)(__m128d)(A), \
8189 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8195 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8202 #define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8203 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8204 -(__v2df)(__m128d)(B), \
8205 -(__v2df)(__m128d)(C), \
8212 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8219 #define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8220 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8221 -(__v2df)(__m128d)(X), \
8222 (__v2df)(__m128d)(Y), \
8223 (__mmask8)(U), (int)(R))
8225 #define _mm512_permutex_pd(X, C) \
8226 (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
8228 #define _mm512_mask_permutex_pd(W, U, X, C) \
8229 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8230 (__v8df)_mm512_permutex_pd((X), (C)), \
8231 (__v8df)(__m512d)(W))
8233 #define _mm512_maskz_permutex_pd(U, X, C) \
8234 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8235 (__v8df)_mm512_permutex_pd((X), (C)), \
8236 (__v8df)_mm512_setzero_pd())
8238 #define _mm512_permutex_epi64(X, C) \
8239 (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
8241 #define _mm512_mask_permutex_epi64(W, U, X, C) \
8242 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8243 (__v8di)_mm512_permutex_epi64((X), (C)), \
8244 (__v8di)(__m512i)(W))
8246 #define _mm512_maskz_permutex_epi64(U, X, C) \
8247 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8248 (__v8di)_mm512_permutex_epi64((X), (C)), \
8249 (__v8di)_mm512_setzero_si512())
8254 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8260 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8268 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8276 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8282 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
8291 return (__m512i)__builtin_ia32_selectq_512((
__mmask8)__M,
8299 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8305 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8313 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8321 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8324 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8329 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
8338 return (__m512i)__builtin_ia32_selectd_512((
__mmask16)__M,
8343 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8378 return (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8384 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8389 *
__C = (
unsigned char)__builtin_ia32_kortestchi(__A, __B);
8390 return (
unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8411 #define _kand_mask16 _mm512_kand
8412 #define _kandn_mask16 _mm512_kandn
8413 #define _knot_mask16 _mm512_knot
8414 #define _kor_mask16 _mm512_kor
8415 #define _kxnor_mask16 _mm512_kxnor
8416 #define _kxor_mask16 _mm512_kxor
8418 #define _kshiftli_mask16(A, I) \
8419 (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
8421 #define _kshiftri_mask16(A, I) \
8422 (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
8426 return (
unsigned int)__builtin_ia32_kmovw((
__mmask16)__A);
8448 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8455 return (__m512i) __builtin_nontemporal_load((
const __v8di_aligned *)__P);
8462 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8468 typedef __v16sf __v16sf_aligned
__attribute__((aligned(64)));
8469 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8475 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8483 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8492 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8500 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8509 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8517 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8526 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8534 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8540 #define _mm_cmp_round_ss_mask(X, Y, P, R) \
8541 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8542 (__v4sf)(__m128)(Y), (int)(P), \
8543 (__mmask8)-1, (int)(R))
8545 #define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8546 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8547 (__v4sf)(__m128)(Y), (int)(P), \
8548 (__mmask8)(M), (int)(R))
8550 #define _mm_cmp_ss_mask(X, Y, P) \
8551 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8552 (__v4sf)(__m128)(Y), (int)(P), \
8554 _MM_FROUND_CUR_DIRECTION)
8556 #define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8557 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8558 (__v4sf)(__m128)(Y), (int)(P), \
8560 _MM_FROUND_CUR_DIRECTION)
8562 #define _mm_cmp_round_sd_mask(X, Y, P, R) \
8563 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8564 (__v2df)(__m128d)(Y), (int)(P), \
8565 (__mmask8)-1, (int)(R))
8567 #define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8568 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8569 (__v2df)(__m128d)(Y), (int)(P), \
8570 (__mmask8)(M), (int)(R))
8572 #define _mm_cmp_sd_mask(X, Y, P) \
8573 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8574 (__v2df)(__m128d)(Y), (int)(P), \
8576 _MM_FROUND_CUR_DIRECTION)
8578 #define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8579 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8580 (__v2df)(__m128d)(Y), (int)(P), \
8582 _MM_FROUND_CUR_DIRECTION)
8645 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8646 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8652 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8660 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8668 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8669 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8675 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8683 return (__m512)__builtin_ia32_selectps_512((
__mmask16)__U,
8691 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B), __W);
8697 return __builtin_ia32_selectss_128(__U,
_mm_move_ss(__A, __B),
8704 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B), __W);
8710 return __builtin_ia32_selectsd_128(__U,
_mm_move_sd(__A, __B),
8717 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8723 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8729 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8733 return (__m128) __builtin_ia32_loadss128_mask ((
const __v4sf *) __A, src, __U & 1);
8739 return (__m128)__builtin_ia32_loadss128_mask ((
const __v4sf *) __A,
8747 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8751 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A, src, __U & 1);
8757 return (__m128d) __builtin_ia32_loadsd128_mask ((
const __v2df *) __A,
8762 #define _mm512_shuffle_epi32(A, I) \
8763 (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
8765 #define _mm512_mask_shuffle_epi32(W, U, A, I) \
8766 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8767 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8768 (__v16si)(__m512i)(W))
8770 #define _mm512_maskz_shuffle_epi32(U, A, I) \
8771 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8772 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8773 (__v16si)_mm512_setzero_si512())
8778 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8786 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8794 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8802 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8810 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8818 return (__m512d) __builtin_ia32_expandloaddf512_mask ((
const __v8df *)__P,
8826 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8834 return (__m512i) __builtin_ia32_expandloaddi512_mask ((
const __v8di *)__P,
8842 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8850 return (__m512) __builtin_ia32_expandloadsf512_mask ((
const __v16sf *)__P,
8858 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8866 return (__m512i) __builtin_ia32_expandloadsi512_mask ((
const __v16si *)__P,
8874 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8882 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8890 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8898 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8903 #define _mm512_cvt_roundps_pd(A, R) \
8904 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8905 (__v8df)_mm512_undefined_pd(), \
8906 (__mmask8)-1, (int)(R))
8908 #define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8909 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8910 (__v8df)(__m512d)(W), \
8911 (__mmask8)(U), (int)(R))
8913 #define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8914 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8915 (__v8df)_mm512_setzero_pd(), \
8916 (__mmask8)(U), (int)(R))
8921 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8927 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8935 return (__m512d)__builtin_ia32_selectpd_512((
__mmask8)__U,
8955 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
8963 return (__m512d) __builtin_ia32_selectpd_512 ((
__mmask8) __U,
8971 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
8979 return (__m512) __builtin_ia32_selectps_512 ((
__mmask16) __U,
8987 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8994 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
9001 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9008 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9012 #define _mm_cvt_roundsd_ss(A, B, R) \
9013 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9014 (__v2df)(__m128d)(B), \
9015 (__v4sf)_mm_undefined_ps(), \
9016 (__mmask8)-1, (int)(R))
9018 #define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9019 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9020 (__v2df)(__m128d)(B), \
9021 (__v4sf)(__m128)(W), \
9022 (__mmask8)(U), (int)(R))
9024 #define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9025 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9026 (__v2df)(__m128d)(B), \
9027 (__v4sf)_mm_setzero_ps(), \
9028 (__mmask8)(U), (int)(R))
9033 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9042 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9048 #define _mm_cvtss_i32 _mm_cvtss_si32
9049 #define _mm_cvtsd_i32 _mm_cvtsd_si32
9050 #define _mm_cvti32_sd _mm_cvtsi32_sd
9051 #define _mm_cvti32_ss _mm_cvtsi32_ss
9053 #define _mm_cvtss_i64 _mm_cvtss_si64
9054 #define _mm_cvtsd_i64 _mm_cvtsd_si64
9055 #define _mm_cvti64_sd _mm_cvtsi64_sd
9056 #define _mm_cvti64_ss _mm_cvtsi64_ss
9060 #define _mm_cvt_roundi64_sd(A, B, R) \
9061 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9064 #define _mm_cvt_roundsi64_sd(A, B, R) \
9065 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9069 #define _mm_cvt_roundsi32_ss(A, B, R) \
9070 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9072 #define _mm_cvt_roundi32_ss(A, B, R) \
9073 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9076 #define _mm_cvt_roundsi64_ss(A, B, R) \
9077 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9080 #define _mm_cvt_roundi64_ss(A, B, R) \
9081 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9085 #define _mm_cvt_roundss_sd(A, B, R) \
9086 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9087 (__v4sf)(__m128)(B), \
9088 (__v2df)_mm_undefined_pd(), \
9089 (__mmask8)-1, (int)(R))
9091 #define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9092 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9093 (__v4sf)(__m128)(B), \
9094 (__v2df)(__m128d)(W), \
9095 (__mmask8)(U), (int)(R))
9097 #define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9098 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9099 (__v4sf)(__m128)(B), \
9100 (__v2df)_mm_setzero_pd(), \
9101 (__mmask8)(U), (int)(R))
9106 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9115 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9129 #define _mm_cvt_roundu64_sd(A, B, R) \
9130 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9131 (unsigned long long)(B), (int)(R))
9134 _mm_cvtu64_sd (__m128d __A,
unsigned long long __B)
9141 #define _mm_cvt_roundu32_ss(A, B, R) \
9142 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9153 #define _mm_cvt_roundu64_ss(A, B, R) \
9154 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9155 (unsigned long long)(B), (int)(R))
9158 _mm_cvtu64_ss (__m128 __A,
unsigned long long __B)
9168 return (__m512i) __builtin_ia32_selectd_512(__M,
9176 return (__m512i) __builtin_ia32_selectq_512(__M,
9183 char __e58,
char __e57,
char __e56,
char __e55,
char __e54,
char __e53,
9184 char __e52,
char __e51,
char __e50,
char __e49,
char __e48,
char __e47,
9185 char __e46,
char __e45,
char __e44,
char __e43,
char __e42,
char __e41,
9186 char __e40,
char __e39,
char __e38,
char __e37,
char __e36,
char __e35,
9187 char __e34,
char __e33,
char __e32,
char __e31,
char __e30,
char __e29,
9188 char __e28,
char __e27,
char __e26,
char __e25,
char __e24,
char __e23,
9189 char __e22,
char __e21,
char __e20,
char __e19,
char __e18,
char __e17,
9190 char __e16,
char __e15,
char __e14,
char __e13,
char __e12,
char __e11,
9191 char __e10,
char __e9,
char __e8,
char __e7,
char __e6,
char __e5,
9192 char __e4,
char __e3,
char __e2,
char __e1,
char __e0) {
9194 return __extension__ (__m512i)(__v64qi)
9195 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9196 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9197 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9198 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9199 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9200 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9201 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9202 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9207 short __e27,
short __e26,
short __e25,
short __e24,
short __e23,
9208 short __e22,
short __e21,
short __e20,
short __e19,
short __e18,
9209 short __e17,
short __e16,
short __e15,
short __e14,
short __e13,
9210 short __e12,
short __e11,
short __e10,
short __e9,
short __e8,
9211 short __e7,
short __e6,
short __e5,
short __e4,
short __e3,
9212 short __e2,
short __e1,
short __e0) {
9213 return __extension__ (__m512i)(__v32hi)
9214 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9215 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9216 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9217 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9222 int __E,
int __F,
int __G,
int __H,
9223 int __I,
int __J,
int __K,
int __L,
9224 int __M,
int __N,
int __O,
int __P)
9226 return __extension__ (__m512i)(__v16si)
9227 { __P, __O, __N, __M, __L, __K, __J, __I,
9228 __H, __G, __F, __E,
__D,
__C, __B, __A };
9231 #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9232 e8,e9,e10,e11,e12,e13,e14,e15) \
9233 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9234 (e5),(e4),(e3),(e2),(e1),(e0))
9238 long long __D,
long long __E,
long long __F,
9239 long long __G,
long long __H)
9241 return __extension__ (__m512i) (__v8di)
9242 { __H, __G, __F, __E,
__D,
__C, __B, __A };
9245 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9246 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9250 double __E,
double __F,
double __G,
double __H)
9252 return __extension__ (__m512d)
9253 { __H, __G, __F, __E,
__D,
__C, __B, __A };
9256 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9257 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9261 float __E,
float __F,
float __G,
float __H,
9262 float __I,
float __J,
float __K,
float __L,
9263 float __M,
float __N,
float __O,
float __P)
9265 return __extension__ (__m512)
9266 { __P, __O, __N, __M, __L, __K, __J, __I,
9267 __H, __G, __F, __E,
__D,
__C, __B, __A };
9270 #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9271 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9272 (e4),(e3),(e2),(e1),(e0))
9308 #define _mm512_mask_reduce_operator(op) \
9309 __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
9310 __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
9311 __m256i __t3 = (__m256i)(__t1 op __t2); \
9312 __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
9313 __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
9314 __v2du __t6 = __t4 op __t5; \
9315 __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9316 __v2du __t8 = __t6 op __t7; \
9358 #undef _mm512_mask_reduce_operator
9360 #define _mm512_mask_reduce_operator(op) \
9361 __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
9362 __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
9363 __m256d __t3 = __t1 op __t2; \
9364 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9365 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9366 __m128d __t6 = __t4 op __t5; \
9367 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9368 __m128d __t8 = __t6 op __t7; \
9390 #undef _mm512_mask_reduce_operator
9392 #define _mm512_mask_reduce_operator(op) \
9393 __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
9394 __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
9395 __m256i __t3 = (__m256i)(__t1 op __t2); \
9396 __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
9397 __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
9398 __v4su __t6 = __t4 op __t5; \
9399 __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9400 __v4su __t8 = __t6 op __t7; \
9401 __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9402 __v4su __t10 = __t8 op __t9; \
9448 #undef _mm512_mask_reduce_operator
9450 #define _mm512_mask_reduce_operator(op) \
9451 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
9452 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \
9453 __m256 __t3 = __t1 op __t2; \
9454 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9455 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9456 __m128 __t6 = __t4 op __t5; \
9457 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9458 __m128 __t8 = __t6 op __t7; \
9459 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9460 __m128 __t10 = __t8 op __t9; \
9484 #undef _mm512_mask_reduce_operator
9486 #define _mm512_mask_reduce_operator(op) \
9487 __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
9488 __m512i __t2 = _mm512_##op(__V, __t1); \
9489 __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
9490 __m512i __t4 = _mm512_##op(__t2, __t3); \
9491 __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
9492 __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
9538 #undef _mm512_mask_reduce_operator
9540 #define _mm512_mask_reduce_operator(op) \
9541 __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
9542 __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
9543 __m256i __t3 = _mm256_##op(__t1, __t2); \
9544 __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
9545 __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
9546 __m128i __t6 = _mm_##op(__t4, __t5); \
9547 __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
9548 __m128i __t8 = _mm_##op(__t6, __t7); \
9549 __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
9550 __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
9596 #undef _mm512_mask_reduce_operator
9598 #define _mm512_mask_reduce_operator(op) \
9599 __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
9600 __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
9601 __m256d __t3 = _mm256_##op(__t1, __t2); \
9602 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9603 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9604 __m128d __t6 = _mm_##op(__t4, __t5); \
9605 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9606 __m128d __t8 = _mm_##op(__t6, __t7); \
9630 #undef _mm512_mask_reduce_operator
9632 #define _mm512_mask_reduce_operator(op) \
9633 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
9634 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
9635 __m256 __t3 = _mm256_##op(__t1, __t2); \
9636 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9637 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9638 __m128 __t6 = _mm_##op(__t4, __t5); \
9639 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9640 __m128 __t8 = _mm_##op(__t6, __t7); \
9641 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9642 __m128 __t10 = _mm_##op(__t8, __t9); \
9666 #undef _mm512_mask_reduce_operator
9681 __v16si
__b = (__v16si)__A;
9685 #undef __DEFAULT_FN_ATTRS512
9686 #undef __DEFAULT_FN_ATTRS128
9687 #undef __DEFAULT_FN_ATTRS
static __inline unsigned char unsigned int unsigned int unsigned int * __p
static __inline__ vector float vector float __b
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
#define _mm512_mask_reduce_operator(op)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS512
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
#define __DEFAULT_FN_ATTRS128
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
static __inline__ void int __a
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ unsigned int unsigned char __D
static __inline__ unsigned char int __C
struct __storeu_i16 *__P __v
static __inline__ void const void * __src
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...