ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
avx512fintrin.h
Go to the documentation of this file.
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
30typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
31typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
32
33typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
34typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
35typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
36
37typedef unsigned char __mmask8;
38typedef unsigned short __mmask16;
39
40/* Rounding mode macros. */
41#define _MM_FROUND_TO_NEAREST_INT 0x00
42#define _MM_FROUND_TO_NEG_INF 0x01
43#define _MM_FROUND_TO_POS_INF 0x02
44#define _MM_FROUND_TO_ZERO 0x03
45#define _MM_FROUND_CUR_DIRECTION 0x04
46
47/* Constants for integer comparison predicates */
48typedef enum {
49 _MM_CMPINT_EQ, /* Equal */
50 _MM_CMPINT_LT, /* Less than */
51 _MM_CMPINT_LE, /* Less than or Equal */
53 _MM_CMPINT_NE, /* Not Equal */
54 _MM_CMPINT_NLT, /* Not Less than */
55#define _MM_CMPINT_GE _MM_CMPINT_NLT /* Greater than or Equal */
56 _MM_CMPINT_NLE /* Not Less than or Equal */
57#define _MM_CMPINT_GT _MM_CMPINT_NLE /* Greater than */
59
60typedef enum
61{
147 _MM_PERM_DDDD = 0xFF
149
150typedef enum
151{
152 _MM_MANT_NORM_1_2, /* interval [1, 2) */
153 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */
154 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */
155 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */
157
158typedef enum
159{
160 _MM_MANT_SIGN_src, /* sign = sign(SRC) */
161 _MM_MANT_SIGN_zero, /* sign = 0 */
162 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */
164
165/* Define the default attributes for the functions in this file. */
166#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
167#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
168#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
169
170/* Create vectors with repeated elements */
171
172static __inline __m512i __DEFAULT_FN_ATTRS512
174{
175 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
176}
177
178#define _mm512_setzero_epi32 _mm512_setzero_si512
179
180static __inline__ __m512d __DEFAULT_FN_ATTRS512
182{
183 return (__m512d)__builtin_ia32_undef512();
184}
185
186static __inline__ __m512 __DEFAULT_FN_ATTRS512
188{
189 return (__m512)__builtin_ia32_undef512();
190}
191
192static __inline__ __m512 __DEFAULT_FN_ATTRS512
194{
195 return (__m512)__builtin_ia32_undef512();
196}
197
198static __inline__ __m512i __DEFAULT_FN_ATTRS512
200{
201 return (__m512i)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512i __DEFAULT_FN_ATTRS512
206{
207 return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
209}
210
211static __inline__ __m512i __DEFAULT_FN_ATTRS512
212_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
213{
214 return (__m512i)__builtin_ia32_selectd_512(__M,
215 (__v16si) _mm512_broadcastd_epi32(__A),
216 (__v16si) __O);
217}
218
219static __inline__ __m512i __DEFAULT_FN_ATTRS512
221{
222 return (__m512i)__builtin_ia32_selectd_512(__M,
223 (__v16si) _mm512_broadcastd_epi32(__A),
224 (__v16si) _mm512_setzero_si512());
225}
226
227static __inline__ __m512i __DEFAULT_FN_ATTRS512
229{
230 return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
231 0, 0, 0, 0, 0, 0, 0, 0);
232}
233
234static __inline__ __m512i __DEFAULT_FN_ATTRS512
235_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
236{
237 return (__m512i)__builtin_ia32_selectq_512(__M,
238 (__v8di) _mm512_broadcastq_epi64(__A),
239 (__v8di) __O);
240
241}
242
243static __inline__ __m512i __DEFAULT_FN_ATTRS512
245{
246 return (__m512i)__builtin_ia32_selectq_512(__M,
247 (__v8di) _mm512_broadcastq_epi64(__A),
248 (__v8di) _mm512_setzero_si512());
249}
250
251
252static __inline __m512 __DEFAULT_FN_ATTRS512
254{
255 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
257}
258
259#define _mm512_setzero _mm512_setzero_ps
260
261static __inline __m512d __DEFAULT_FN_ATTRS512
263{
264 return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
265}
266
267static __inline __m512 __DEFAULT_FN_ATTRS512
269{
270 return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271 __w, __w, __w, __w, __w, __w, __w, __w };
272}
273
274static __inline __m512d __DEFAULT_FN_ATTRS512
275_mm512_set1_pd(double __w)
276{
277 return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
278}
279
280static __inline __m512i __DEFAULT_FN_ATTRS512
282{
283 return __extension__ (__m512i)(__v64qi){
284 __w, __w, __w, __w, __w, __w, __w, __w,
285 __w, __w, __w, __w, __w, __w, __w, __w,
286 __w, __w, __w, __w, __w, __w, __w, __w,
287 __w, __w, __w, __w, __w, __w, __w, __w,
288 __w, __w, __w, __w, __w, __w, __w, __w,
289 __w, __w, __w, __w, __w, __w, __w, __w,
290 __w, __w, __w, __w, __w, __w, __w, __w,
291 __w, __w, __w, __w, __w, __w, __w, __w };
292}
293
294static __inline __m512i __DEFAULT_FN_ATTRS512
296{
297 return __extension__ (__m512i)(__v32hi){
298 __w, __w, __w, __w, __w, __w, __w, __w,
299 __w, __w, __w, __w, __w, __w, __w, __w,
300 __w, __w, __w, __w, __w, __w, __w, __w,
301 __w, __w, __w, __w, __w, __w, __w, __w };
302}
303
304static __inline __m512i __DEFAULT_FN_ATTRS512
306{
307 return __extension__ (__m512i)(__v16si){
308 __s, __s, __s, __s, __s, __s, __s, __s,
309 __s, __s, __s, __s, __s, __s, __s, __s };
310}
311
312static __inline __m512i __DEFAULT_FN_ATTRS512
314{
315 return (__m512i)__builtin_ia32_selectd_512(__M,
316 (__v16si)_mm512_set1_epi32(__A),
317 (__v16si)_mm512_setzero_si512());
318}
319
320static __inline __m512i __DEFAULT_FN_ATTRS512
321_mm512_set1_epi64(long long __d)
322{
323 return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
324}
325
326static __inline __m512i __DEFAULT_FN_ATTRS512
328{
329 return (__m512i)__builtin_ia32_selectq_512(__M,
330 (__v8di)_mm512_set1_epi64(__A),
331 (__v8di)_mm512_setzero_si512());
332}
333
334static __inline__ __m512 __DEFAULT_FN_ATTRS512
336{
337 return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339}
340
341static __inline __m512i __DEFAULT_FN_ATTRS512
342_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
343{
344 return __extension__ (__m512i)(__v16si)
345 { __D, __C, __B, __A, __D, __C, __B, __A,
346 __D, __C, __B, __A, __D, __C, __B, __A };
347}
348
349static __inline __m512i __DEFAULT_FN_ATTRS512
350_mm512_set4_epi64 (long long __A, long long __B, long long __C,
351 long long __D)
352{
353 return __extension__ (__m512i) (__v8di)
354 { __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512d __DEFAULT_FN_ATTRS512
358_mm512_set4_pd (double __A, double __B, double __C, double __D)
359{
360 return __extension__ (__m512d)
361 { __D, __C, __B, __A, __D, __C, __B, __A };
362}
363
364static __inline __m512 __DEFAULT_FN_ATTRS512
365_mm512_set4_ps (float __A, float __B, float __C, float __D)
366{
367 return __extension__ (__m512)
368 { __D, __C, __B, __A, __D, __C, __B, __A,
369 __D, __C, __B, __A, __D, __C, __B, __A };
370}
371
372#define _mm512_setr4_epi32(e0,e1,e2,e3) \
373 _mm512_set4_epi32((e3),(e2),(e1),(e0))
374
375#define _mm512_setr4_epi64(e0,e1,e2,e3) \
376 _mm512_set4_epi64((e3),(e2),(e1),(e0))
377
378#define _mm512_setr4_pd(e0,e1,e2,e3) \
379 _mm512_set4_pd((e3),(e2),(e1),(e0))
380
381#define _mm512_setr4_ps(e0,e1,e2,e3) \
382 _mm512_set4_ps((e3),(e2),(e1),(e0))
383
384static __inline__ __m512d __DEFAULT_FN_ATTRS512
386{
387 return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
388 0, 0, 0, 0, 0, 0, 0, 0);
389}
390
391/* Cast between vector types */
392
393static __inline __m512d __DEFAULT_FN_ATTRS512
395{
396 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
397}
398
399static __inline __m512 __DEFAULT_FN_ATTRS512
401{
402 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7,
403 -1, -1, -1, -1, -1, -1, -1, -1);
404}
405
406static __inline __m128d __DEFAULT_FN_ATTRS512
408{
409 return __builtin_shufflevector(__a, __a, 0, 1);
410}
411
412static __inline __m256d __DEFAULT_FN_ATTRS512
414{
415 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
416}
417
418static __inline __m128 __DEFAULT_FN_ATTRS512
420{
421 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
422}
423
424static __inline __m256 __DEFAULT_FN_ATTRS512
426{
427 return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
428}
429
430static __inline __m512 __DEFAULT_FN_ATTRS512
431_mm512_castpd_ps (__m512d __A)
432{
433 return (__m512) (__A);
434}
435
436static __inline __m512i __DEFAULT_FN_ATTRS512
438{
439 return (__m512i) (__A);
440}
441
442static __inline__ __m512d __DEFAULT_FN_ATTRS512
444{
445 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
446}
447
448static __inline __m512d __DEFAULT_FN_ATTRS512
450{
451 return (__m512d) (__A);
452}
453
454static __inline __m512i __DEFAULT_FN_ATTRS512
456{
457 return (__m512i) (__A);
458}
459
460static __inline__ __m512 __DEFAULT_FN_ATTRS512
462{
463 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
464}
465
466static __inline__ __m512i __DEFAULT_FN_ATTRS512
468{
469 return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
470}
471
472static __inline__ __m512i __DEFAULT_FN_ATTRS512
474{
475 return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
476}
477
478static __inline __m512 __DEFAULT_FN_ATTRS512
480{
481 return (__m512) (__A);
482}
483
484static __inline __m512d __DEFAULT_FN_ATTRS512
486{
487 return (__m512d) (__A);
488}
489
490static __inline __m128i __DEFAULT_FN_ATTRS512
492{
493 return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
494}
495
496static __inline __m256i __DEFAULT_FN_ATTRS512
498{
499 return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
500}
501
502static __inline__ __mmask16 __DEFAULT_FN_ATTRS
504{
505 return (__mmask16)__a;
506}
507
508static __inline__ int __DEFAULT_FN_ATTRS
510{
511 return (int)__a;
512}
513
527static __inline __m512d __DEFAULT_FN_ATTRS512
529{
530 return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
531}
532
546static __inline __m512d __DEFAULT_FN_ATTRS512
548{
549 return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
550}
551
564static __inline __m512 __DEFAULT_FN_ATTRS512
566{
567 return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
568}
569
582static __inline __m512 __DEFAULT_FN_ATTRS512
584{
585 return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
586}
587
600static __inline __m512i __DEFAULT_FN_ATTRS512
602{
603 return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
604}
605
618static __inline __m512i __DEFAULT_FN_ATTRS512
620{
621 return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
622}
623
624/* Bitwise operators */
625static __inline__ __m512i __DEFAULT_FN_ATTRS512
626_mm512_and_epi32(__m512i __a, __m512i __b)
627{
628 return (__m512i)((__v16su)__a & (__v16su)__b);
629}
630
631static __inline__ __m512i __DEFAULT_FN_ATTRS512
632_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
633{
634 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
635 (__v16si) _mm512_and_epi32(__a, __b),
636 (__v16si) __src);
637}
638
639static __inline__ __m512i __DEFAULT_FN_ATTRS512
641{
643 __k, __a, __b);
644}
645
646static __inline__ __m512i __DEFAULT_FN_ATTRS512
647_mm512_and_epi64(__m512i __a, __m512i __b)
648{
649 return (__m512i)((__v8du)__a & (__v8du)__b);
650}
651
652static __inline__ __m512i __DEFAULT_FN_ATTRS512
653_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
654{
655 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
656 (__v8di) _mm512_and_epi64(__a, __b),
657 (__v8di) __src);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
662{
664 __k, __a, __b);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512
668_mm512_andnot_si512 (__m512i __A, __m512i __B)
669{
670 return (__m512i)(~(__v8du)__A & (__v8du)__B);
671}
672
673static __inline__ __m512i __DEFAULT_FN_ATTRS512
674_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675{
676 return (__m512i)(~(__v16su)__A & (__v16su)__B);
677}
678
679static __inline__ __m512i __DEFAULT_FN_ATTRS512
680_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681{
682 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683 (__v16si)_mm512_andnot_epi32(__A, __B),
684 (__v16si)__W);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689{
691 __U, __A, __B);
692}
693
694static __inline__ __m512i __DEFAULT_FN_ATTRS512
695_mm512_andnot_epi64(__m512i __A, __m512i __B)
696{
697 return (__m512i)(~(__v8du)__A & (__v8du)__B);
698}
699
700static __inline__ __m512i __DEFAULT_FN_ATTRS512
701_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704 (__v8di)_mm512_andnot_epi64(__A, __B),
705 (__v8di)__W);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710{
712 __U, __A, __B);
713}
714
715static __inline__ __m512i __DEFAULT_FN_ATTRS512
716_mm512_or_epi32(__m512i __a, __m512i __b)
717{
718 return (__m512i)((__v16su)__a | (__v16su)__b);
719}
720
721static __inline__ __m512i __DEFAULT_FN_ATTRS512
722_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723{
724 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725 (__v16si)_mm512_or_epi32(__a, __b),
726 (__v16si)__src);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
731{
732 return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512
736_mm512_or_epi64(__m512i __a, __m512i __b)
737{
738 return (__m512i)((__v8du)__a | (__v8du)__b);
739}
740
741static __inline__ __m512i __DEFAULT_FN_ATTRS512
742_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743{
744 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745 (__v8di)_mm512_or_epi64(__a, __b),
746 (__v8di)__src);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751{
752 return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512
756_mm512_xor_epi32(__m512i __a, __m512i __b)
757{
758 return (__m512i)((__v16su)__a ^ (__v16su)__b);
759}
760
761static __inline__ __m512i __DEFAULT_FN_ATTRS512
762_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763{
764 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765 (__v16si)_mm512_xor_epi32(__a, __b),
766 (__v16si)__src);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
771{
772 return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512
776_mm512_xor_epi64(__m512i __a, __m512i __b)
777{
778 return (__m512i)((__v8du)__a ^ (__v8du)__b);
779}
780
781static __inline__ __m512i __DEFAULT_FN_ATTRS512
782_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783{
784 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785 (__v8di)_mm512_xor_epi64(__a, __b),
786 (__v8di)__src);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
791{
792 return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512
796_mm512_and_si512(__m512i __a, __m512i __b)
797{
798 return (__m512i)((__v8du)__a & (__v8du)__b);
799}
800
801static __inline__ __m512i __DEFAULT_FN_ATTRS512
802_mm512_or_si512(__m512i __a, __m512i __b)
803{
804 return (__m512i)((__v8du)__a | (__v8du)__b);
805}
806
807static __inline__ __m512i __DEFAULT_FN_ATTRS512
808_mm512_xor_si512(__m512i __a, __m512i __b)
809{
810 return (__m512i)((__v8du)__a ^ (__v8du)__b);
811}
812
813/* Arithmetic */
814
815static __inline __m512d __DEFAULT_FN_ATTRS512
816_mm512_add_pd(__m512d __a, __m512d __b)
817{
818 return (__m512d)((__v8df)__a + (__v8df)__b);
819}
820
821static __inline __m512 __DEFAULT_FN_ATTRS512
822_mm512_add_ps(__m512 __a, __m512 __b)
823{
824 return (__m512)((__v16sf)__a + (__v16sf)__b);
825}
826
827static __inline __m512d __DEFAULT_FN_ATTRS512
828_mm512_mul_pd(__m512d __a, __m512d __b)
829{
830 return (__m512d)((__v8df)__a * (__v8df)__b);
831}
832
833static __inline __m512 __DEFAULT_FN_ATTRS512
834_mm512_mul_ps(__m512 __a, __m512 __b)
835{
836 return (__m512)((__v16sf)__a * (__v16sf)__b);
837}
838
839static __inline __m512d __DEFAULT_FN_ATTRS512
840_mm512_sub_pd(__m512d __a, __m512d __b)
841{
842 return (__m512d)((__v8df)__a - (__v8df)__b);
843}
844
845static __inline __m512 __DEFAULT_FN_ATTRS512
846_mm512_sub_ps(__m512 __a, __m512 __b)
847{
848 return (__m512)((__v16sf)__a - (__v16sf)__b);
849}
850
851static __inline__ __m512i __DEFAULT_FN_ATTRS512
852_mm512_add_epi64 (__m512i __A, __m512i __B)
853{
854 return (__m512i) ((__v8du) __A + (__v8du) __B);
855}
856
857static __inline__ __m512i __DEFAULT_FN_ATTRS512
858_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
859{
860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
861 (__v8di)_mm512_add_epi64(__A, __B),
862 (__v8di)__W);
863}
864
865static __inline__ __m512i __DEFAULT_FN_ATTRS512
866_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
867{
868 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
869 (__v8di)_mm512_add_epi64(__A, __B),
870 (__v8di)_mm512_setzero_si512());
871}
872
873static __inline__ __m512i __DEFAULT_FN_ATTRS512
874_mm512_sub_epi64 (__m512i __A, __m512i __B)
875{
876 return (__m512i) ((__v8du) __A - (__v8du) __B);
877}
878
879static __inline__ __m512i __DEFAULT_FN_ATTRS512
880_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
881{
882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883 (__v8di)_mm512_sub_epi64(__A, __B),
884 (__v8di)__W);
885}
886
887static __inline__ __m512i __DEFAULT_FN_ATTRS512
888_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
889{
890 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
891 (__v8di)_mm512_sub_epi64(__A, __B),
892 (__v8di)_mm512_setzero_si512());
893}
894
895static __inline__ __m512i __DEFAULT_FN_ATTRS512
896_mm512_add_epi32 (__m512i __A, __m512i __B)
897{
898 return (__m512i) ((__v16su) __A + (__v16su) __B);
899}
900
901static __inline__ __m512i __DEFAULT_FN_ATTRS512
902_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
903{
904 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
905 (__v16si)_mm512_add_epi32(__A, __B),
906 (__v16si)__W);
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512
910_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
911{
912 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
913 (__v16si)_mm512_add_epi32(__A, __B),
914 (__v16si)_mm512_setzero_si512());
915}
916
917static __inline__ __m512i __DEFAULT_FN_ATTRS512
918_mm512_sub_epi32 (__m512i __A, __m512i __B)
919{
920 return (__m512i) ((__v16su) __A - (__v16su) __B);
921}
922
923static __inline__ __m512i __DEFAULT_FN_ATTRS512
924_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
925{
926 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927 (__v16si)_mm512_sub_epi32(__A, __B),
928 (__v16si)__W);
929}
930
931static __inline__ __m512i __DEFAULT_FN_ATTRS512
932_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
933{
934 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
935 (__v16si)_mm512_sub_epi32(__A, __B),
936 (__v16si)_mm512_setzero_si512());
937}
938
939#define _mm512_max_round_pd(A, B, R) \
940 (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
941 (__v8df)(__m512d)(B), (int)(R))
942
943#define _mm512_mask_max_round_pd(W, U, A, B, R) \
944 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
945 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
946 (__v8df)(W))
947
948#define _mm512_maskz_max_round_pd(U, A, B, R) \
949 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
950 (__v8df)_mm512_max_round_pd((A), (B), (R)), \
951 (__v8df)_mm512_setzero_pd())
952
953static __inline__ __m512d __DEFAULT_FN_ATTRS512
954_mm512_max_pd(__m512d __A, __m512d __B)
955{
956 return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
958}
959
960static __inline__ __m512d __DEFAULT_FN_ATTRS512
961_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
962{
963 return (__m512d)__builtin_ia32_selectpd_512(__U,
964 (__v8df)_mm512_max_pd(__A, __B),
965 (__v8df)__W);
966}
967
968static __inline__ __m512d __DEFAULT_FN_ATTRS512
969_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
970{
971 return (__m512d)__builtin_ia32_selectpd_512(__U,
972 (__v8df)_mm512_max_pd(__A, __B),
973 (__v8df)_mm512_setzero_pd());
974}
975
976#define _mm512_max_round_ps(A, B, R) \
977 (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978 (__v16sf)(__m512)(B), (int)(R))
979
980#define _mm512_mask_max_round_ps(W, U, A, B, R) \
981 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
982 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
983 (__v16sf)(W))
984
985#define _mm512_maskz_max_round_ps(U, A, B, R) \
986 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
987 (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
988 (__v16sf)_mm512_setzero_ps())
989
990static __inline__ __m512 __DEFAULT_FN_ATTRS512
991_mm512_max_ps(__m512 __A, __m512 __B)
992{
993 return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
995}
996
997static __inline__ __m512 __DEFAULT_FN_ATTRS512
998_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
999{
1000 return (__m512)__builtin_ia32_selectps_512(__U,
1001 (__v16sf)_mm512_max_ps(__A, __B),
1002 (__v16sf)__W);
1003}
1004
1005static __inline__ __m512 __DEFAULT_FN_ATTRS512
1006_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1007{
1008 return (__m512)__builtin_ia32_selectps_512(__U,
1009 (__v16sf)_mm512_max_ps(__A, __B),
1010 (__v16sf)_mm512_setzero_ps());
1011}
1012
1013static __inline__ __m128 __DEFAULT_FN_ATTRS128
1014_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1015 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1016 (__v4sf) __B,
1017 (__v4sf) __W,
1018 (__mmask8) __U,
1020}
1021
1022static __inline__ __m128 __DEFAULT_FN_ATTRS128
1023_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1024 return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1025 (__v4sf) __B,
1026 (__v4sf) _mm_setzero_ps (),
1027 (__mmask8) __U,
1029}
1030
1031#define _mm_max_round_ss(A, B, R) \
1032 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1033 (__v4sf)(__m128)(B), \
1034 (__v4sf)_mm_setzero_ps(), \
1035 (__mmask8)-1, (int)(R))
1036
1037#define _mm_mask_max_round_ss(W, U, A, B, R) \
1038 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1039 (__v4sf)(__m128)(B), \
1040 (__v4sf)(__m128)(W), (__mmask8)(U), \
1041 (int)(R))
1042
1043#define _mm_maskz_max_round_ss(U, A, B, R) \
1044 (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1045 (__v4sf)(__m128)(B), \
1046 (__v4sf)_mm_setzero_ps(), \
1047 (__mmask8)(U), (int)(R))
1048
1049static __inline__ __m128d __DEFAULT_FN_ATTRS128
1050_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1051 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1052 (__v2df) __B,
1053 (__v2df) __W,
1054 (__mmask8) __U,
1056}
1057
1058static __inline__ __m128d __DEFAULT_FN_ATTRS128
1059_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1060 return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1061 (__v2df) __B,
1062 (__v2df) _mm_setzero_pd (),
1063 (__mmask8) __U,
1065}
1066
1067#define _mm_max_round_sd(A, B, R) \
1068 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1069 (__v2df)(__m128d)(B), \
1070 (__v2df)_mm_setzero_pd(), \
1071 (__mmask8)-1, (int)(R))
1072
1073#define _mm_mask_max_round_sd(W, U, A, B, R) \
1074 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1075 (__v2df)(__m128d)(B), \
1076 (__v2df)(__m128d)(W), \
1077 (__mmask8)(U), (int)(R))
1078
1079#define _mm_maskz_max_round_sd(U, A, B, R) \
1080 (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1081 (__v2df)(__m128d)(B), \
1082 (__v2df)_mm_setzero_pd(), \
1083 (__mmask8)(U), (int)(R))
1084
1085static __inline __m512i
1087_mm512_max_epi32(__m512i __A, __m512i __B)
1088{
1089 return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1090}
1091
1092static __inline__ __m512i __DEFAULT_FN_ATTRS512
1093_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1094{
1095 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096 (__v16si)_mm512_max_epi32(__A, __B),
1097 (__v16si)__W);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1102{
1103 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1104 (__v16si)_mm512_max_epi32(__A, __B),
1105 (__v16si)_mm512_setzero_si512());
1106}
1107
1108static __inline __m512i __DEFAULT_FN_ATTRS512
1109_mm512_max_epu32(__m512i __A, __m512i __B)
1110{
1111 return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1112}
1113
1114static __inline__ __m512i __DEFAULT_FN_ATTRS512
1115_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1116{
1117 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118 (__v16si)_mm512_max_epu32(__A, __B),
1119 (__v16si)__W);
1120}
1121
1122static __inline__ __m512i __DEFAULT_FN_ATTRS512
1123_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1124{
1125 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1126 (__v16si)_mm512_max_epu32(__A, __B),
1127 (__v16si)_mm512_setzero_si512());
1128}
1129
1130static __inline __m512i __DEFAULT_FN_ATTRS512
1131_mm512_max_epi64(__m512i __A, __m512i __B)
1132{
1133 return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1134}
1135
1136static __inline__ __m512i __DEFAULT_FN_ATTRS512
1137_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1138{
1139 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1140 (__v8di)_mm512_max_epi64(__A, __B),
1141 (__v8di)__W);
1142}
1143
1144static __inline__ __m512i __DEFAULT_FN_ATTRS512
1145_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1146{
1147 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1148 (__v8di)_mm512_max_epi64(__A, __B),
1149 (__v8di)_mm512_setzero_si512());
1150}
1151
1152static __inline __m512i __DEFAULT_FN_ATTRS512
1153_mm512_max_epu64(__m512i __A, __m512i __B)
1154{
1155 return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1156}
1157
1158static __inline__ __m512i __DEFAULT_FN_ATTRS512
1159_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1160{
1161 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162 (__v8di)_mm512_max_epu64(__A, __B),
1163 (__v8di)__W);
1164}
1165
1166static __inline__ __m512i __DEFAULT_FN_ATTRS512
1167_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1168{
1169 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1170 (__v8di)_mm512_max_epu64(__A, __B),
1171 (__v8di)_mm512_setzero_si512());
1172}
1173
1174#define _mm512_min_round_pd(A, B, R) \
1175 (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1176 (__v8df)(__m512d)(B), (int)(R))
1177
1178#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1179 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1180 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1181 (__v8df)(W))
1182
1183#define _mm512_maskz_min_round_pd(U, A, B, R) \
1184 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1185 (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1186 (__v8df)_mm512_setzero_pd())
1187
1188static __inline__ __m512d __DEFAULT_FN_ATTRS512
1189_mm512_min_pd(__m512d __A, __m512d __B)
1190{
1191 return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1193}
1194
1195static __inline__ __m512d __DEFAULT_FN_ATTRS512
1196_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1197{
1198 return (__m512d)__builtin_ia32_selectpd_512(__U,
1199 (__v8df)_mm512_min_pd(__A, __B),
1200 (__v8df)__W);
1201}
1202
1203static __inline__ __m512d __DEFAULT_FN_ATTRS512
1204_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1205{
1206 return (__m512d)__builtin_ia32_selectpd_512(__U,
1207 (__v8df)_mm512_min_pd(__A, __B),
1208 (__v8df)_mm512_setzero_pd());
1209}
1210
1211#define _mm512_min_round_ps(A, B, R) \
1212 (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213 (__v16sf)(__m512)(B), (int)(R))
1214
1215#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1216 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1217 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1218 (__v16sf)(W))
1219
1220#define _mm512_maskz_min_round_ps(U, A, B, R) \
1221 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1222 (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1223 (__v16sf)_mm512_setzero_ps())
1224
1225static __inline__ __m512 __DEFAULT_FN_ATTRS512
1226_mm512_min_ps(__m512 __A, __m512 __B)
1227{
1228 return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1230}
1231
1232static __inline__ __m512 __DEFAULT_FN_ATTRS512
1233_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1234{
1235 return (__m512)__builtin_ia32_selectps_512(__U,
1236 (__v16sf)_mm512_min_ps(__A, __B),
1237 (__v16sf)__W);
1238}
1239
1240static __inline__ __m512 __DEFAULT_FN_ATTRS512
1241_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1242{
1243 return (__m512)__builtin_ia32_selectps_512(__U,
1244 (__v16sf)_mm512_min_ps(__A, __B),
1245 (__v16sf)_mm512_setzero_ps());
1246}
1247
1248static __inline__ __m128 __DEFAULT_FN_ATTRS128
1249_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1250 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1251 (__v4sf) __B,
1252 (__v4sf) __W,
1253 (__mmask8) __U,
1255}
1256
1257static __inline__ __m128 __DEFAULT_FN_ATTRS128
1258_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1259 return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1260 (__v4sf) __B,
1261 (__v4sf) _mm_setzero_ps (),
1262 (__mmask8) __U,
1264}
1265
1266#define _mm_min_round_ss(A, B, R) \
1267 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1268 (__v4sf)(__m128)(B), \
1269 (__v4sf)_mm_setzero_ps(), \
1270 (__mmask8)-1, (int)(R))
1271
1272#define _mm_mask_min_round_ss(W, U, A, B, R) \
1273 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1274 (__v4sf)(__m128)(B), \
1275 (__v4sf)(__m128)(W), (__mmask8)(U), \
1276 (int)(R))
1277
1278#define _mm_maskz_min_round_ss(U, A, B, R) \
1279 (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1280 (__v4sf)(__m128)(B), \
1281 (__v4sf)_mm_setzero_ps(), \
1282 (__mmask8)(U), (int)(R))
1283
1284static __inline__ __m128d __DEFAULT_FN_ATTRS128
1285_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1286 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1287 (__v2df) __B,
1288 (__v2df) __W,
1289 (__mmask8) __U,
1291}
1292
1293static __inline__ __m128d __DEFAULT_FN_ATTRS128
1294_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1295 return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1296 (__v2df) __B,
1297 (__v2df) _mm_setzero_pd (),
1298 (__mmask8) __U,
1300}
1301
1302#define _mm_min_round_sd(A, B, R) \
1303 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1304 (__v2df)(__m128d)(B), \
1305 (__v2df)_mm_setzero_pd(), \
1306 (__mmask8)-1, (int)(R))
1307
1308#define _mm_mask_min_round_sd(W, U, A, B, R) \
1309 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1310 (__v2df)(__m128d)(B), \
1311 (__v2df)(__m128d)(W), \
1312 (__mmask8)(U), (int)(R))
1313
1314#define _mm_maskz_min_round_sd(U, A, B, R) \
1315 (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1316 (__v2df)(__m128d)(B), \
1317 (__v2df)_mm_setzero_pd(), \
1318 (__mmask8)(U), (int)(R))
1319
1320static __inline __m512i
1322_mm512_min_epi32(__m512i __A, __m512i __B)
1323{
1324 return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1325}
1326
1327static __inline__ __m512i __DEFAULT_FN_ATTRS512
1328_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1329{
1330 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1331 (__v16si)_mm512_min_epi32(__A, __B),
1332 (__v16si)__W);
1333}
1334
1335static __inline__ __m512i __DEFAULT_FN_ATTRS512
1336_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1337{
1338 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1339 (__v16si)_mm512_min_epi32(__A, __B),
1340 (__v16si)_mm512_setzero_si512());
1341}
1342
1343static __inline __m512i __DEFAULT_FN_ATTRS512
1344_mm512_min_epu32(__m512i __A, __m512i __B)
1345{
1346 return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1347}
1348
1349static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1351{
1352 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353 (__v16si)_mm512_min_epu32(__A, __B),
1354 (__v16si)__W);
1355}
1356
1357static __inline__ __m512i __DEFAULT_FN_ATTRS512
1358_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1359{
1360 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1361 (__v16si)_mm512_min_epu32(__A, __B),
1362 (__v16si)_mm512_setzero_si512());
1363}
1364
1365static __inline __m512i __DEFAULT_FN_ATTRS512
1366_mm512_min_epi64(__m512i __A, __m512i __B)
1367{
1368 return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1369}
1370
1371static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1373{
1374 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1375 (__v8di)_mm512_min_epi64(__A, __B),
1376 (__v8di)__W);
1377}
1378
1379static __inline__ __m512i __DEFAULT_FN_ATTRS512
1380_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1381{
1382 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1383 (__v8di)_mm512_min_epi64(__A, __B),
1384 (__v8di)_mm512_setzero_si512());
1385}
1386
1387static __inline __m512i __DEFAULT_FN_ATTRS512
1388_mm512_min_epu64(__m512i __A, __m512i __B)
1389{
1390 return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1391}
1392
1393static __inline__ __m512i __DEFAULT_FN_ATTRS512
1394_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1395{
1396 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397 (__v8di)_mm512_min_epu64(__A, __B),
1398 (__v8di)__W);
1399}
1400
1401static __inline__ __m512i __DEFAULT_FN_ATTRS512
1402_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1403{
1404 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405 (__v8di)_mm512_min_epu64(__A, __B),
1406 (__v8di)_mm512_setzero_si512());
1407}
1408
1409static __inline __m512i __DEFAULT_FN_ATTRS512
1410_mm512_mul_epi32(__m512i __X, __m512i __Y)
1411{
1412 return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1413}
1414
1415static __inline __m512i __DEFAULT_FN_ATTRS512
1416_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1417{
1418 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419 (__v8di)_mm512_mul_epi32(__X, __Y),
1420 (__v8di)__W);
1421}
1422
1423static __inline __m512i __DEFAULT_FN_ATTRS512
1424_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1425{
1426 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1427 (__v8di)_mm512_mul_epi32(__X, __Y),
1428 (__v8di)_mm512_setzero_si512 ());
1429}
1430
1431static __inline __m512i __DEFAULT_FN_ATTRS512
1432_mm512_mul_epu32(__m512i __X, __m512i __Y)
1433{
1434 return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1435}
1436
1437static __inline __m512i __DEFAULT_FN_ATTRS512
1438_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1439{
1440 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441 (__v8di)_mm512_mul_epu32(__X, __Y),
1442 (__v8di)__W);
1443}
1444
1445static __inline __m512i __DEFAULT_FN_ATTRS512
1446_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1447{
1448 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1449 (__v8di)_mm512_mul_epu32(__X, __Y),
1450 (__v8di)_mm512_setzero_si512 ());
1451}
1452
1453static __inline __m512i __DEFAULT_FN_ATTRS512
1454_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1455{
1456 return (__m512i) ((__v16su) __A * (__v16su) __B);
1457}
1458
1459static __inline __m512i __DEFAULT_FN_ATTRS512
1460_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1461{
1462 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1463 (__v16si)_mm512_mullo_epi32(__A, __B),
1464 (__v16si)_mm512_setzero_si512());
1465}
1466
1467static __inline __m512i __DEFAULT_FN_ATTRS512
1468_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1469{
1470 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1471 (__v16si)_mm512_mullo_epi32(__A, __B),
1472 (__v16si)__W);
1473}
1474
1475static __inline__ __m512i __DEFAULT_FN_ATTRS512
1476_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1477 return (__m512i) ((__v8du) __A * (__v8du) __B);
1478}
1479
1480static __inline__ __m512i __DEFAULT_FN_ATTRS512
1481_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1482 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1483 (__v8di)_mm512_mullox_epi64(__A, __B),
1484 (__v8di)__W);
1485}
1486
1487#define _mm512_sqrt_round_pd(A, R) \
1488 (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1489
1490#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1491 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1492 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1493 (__v8df)(__m512d)(W))
1494
1495#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1496 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1497 (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1498 (__v8df)_mm512_setzero_pd())
1499
1500static __inline__ __m512d __DEFAULT_FN_ATTRS512
1501_mm512_sqrt_pd(__m512d __A)
1502{
1503 return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1505}
1506
1507static __inline__ __m512d __DEFAULT_FN_ATTRS512
1508_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1509{
1510 return (__m512d)__builtin_ia32_selectpd_512(__U,
1511 (__v8df)_mm512_sqrt_pd(__A),
1512 (__v8df)__W);
1513}
1514
1515static __inline__ __m512d __DEFAULT_FN_ATTRS512
1517{
1518 return (__m512d)__builtin_ia32_selectpd_512(__U,
1519 (__v8df)_mm512_sqrt_pd(__A),
1520 (__v8df)_mm512_setzero_pd());
1521}
1522
1523#define _mm512_sqrt_round_ps(A, R) \
1524 (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1525
1526#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1527 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1528 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1529 (__v16sf)(__m512)(W))
1530
1531#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1532 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1533 (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1534 (__v16sf)_mm512_setzero_ps())
1535
1536static __inline__ __m512 __DEFAULT_FN_ATTRS512
1538{
1539 return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1541}
1542
1543static __inline__ __m512 __DEFAULT_FN_ATTRS512
1544_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1545{
1546 return (__m512)__builtin_ia32_selectps_512(__U,
1547 (__v16sf)_mm512_sqrt_ps(__A),
1548 (__v16sf)__W);
1549}
1550
1551static __inline__ __m512 __DEFAULT_FN_ATTRS512
1553{
1554 return (__m512)__builtin_ia32_selectps_512(__U,
1555 (__v16sf)_mm512_sqrt_ps(__A),
1556 (__v16sf)_mm512_setzero_ps());
1557}
1558
1559static __inline__ __m512d __DEFAULT_FN_ATTRS512
1561{
1562 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1563 (__v8df)
1565 (__mmask8) -1);}
1566
1567static __inline__ __m512d __DEFAULT_FN_ATTRS512
1568_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1569{
1570 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571 (__v8df) __W,
1572 (__mmask8) __U);
1573}
1574
1575static __inline__ __m512d __DEFAULT_FN_ATTRS512
1577{
1578 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579 (__v8df)
1581 (__mmask8) __U);
1582}
1583
1584static __inline__ __m512 __DEFAULT_FN_ATTRS512
1586{
1587 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1588 (__v16sf)
1590 (__mmask16) -1);
1591}
1592
1593static __inline__ __m512 __DEFAULT_FN_ATTRS512
1594_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1595{
1596 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597 (__v16sf) __W,
1598 (__mmask16) __U);
1599}
1600
1601static __inline__ __m512 __DEFAULT_FN_ATTRS512
1603{
1604 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605 (__v16sf)
1607 (__mmask16) __U);
1608}
1609
1610static __inline__ __m128 __DEFAULT_FN_ATTRS128
1611_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1612{
1613 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1614 (__v4sf) __B,
1615 (__v4sf)
1616 _mm_setzero_ps (),
1617 (__mmask8) -1);
1618}
1619
1620static __inline__ __m128 __DEFAULT_FN_ATTRS128
1621_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1622{
1623 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624 (__v4sf) __B,
1625 (__v4sf) __W,
1626 (__mmask8) __U);
1627}
1628
1629static __inline__ __m128 __DEFAULT_FN_ATTRS128
1630_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1631{
1632 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633 (__v4sf) __B,
1634 (__v4sf) _mm_setzero_ps (),
1635 (__mmask8) __U);
1636}
1637
1638static __inline__ __m128d __DEFAULT_FN_ATTRS128
1639_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1640{
1641 return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1642 (__v2df) __B,
1643 (__v2df)
1644 _mm_setzero_pd (),
1645 (__mmask8) -1);
1646}
1647
1648static __inline__ __m128d __DEFAULT_FN_ATTRS128
1649_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1650{
1651 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1652 (__v2df) __B,
1653 (__v2df) __W,
1654 (__mmask8) __U);
1655}
1656
1657static __inline__ __m128d __DEFAULT_FN_ATTRS128
1658_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1659{
1660 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1661 (__v2df) __B,
1662 (__v2df) _mm_setzero_pd (),
1663 (__mmask8) __U);
1664}
1665
1666static __inline__ __m512d __DEFAULT_FN_ATTRS512
1668{
1669 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1670 (__v8df)
1672 (__mmask8) -1);
1673}
1674
1675static __inline__ __m512d __DEFAULT_FN_ATTRS512
1676_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1677{
1678 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1679 (__v8df) __W,
1680 (__mmask8) __U);
1681}
1682
1683static __inline__ __m512d __DEFAULT_FN_ATTRS512
1685{
1686 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687 (__v8df)
1689 (__mmask8) __U);
1690}
1691
1692static __inline__ __m512 __DEFAULT_FN_ATTRS512
1694{
1695 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1696 (__v16sf)
1698 (__mmask16) -1);
1699}
1700
1701static __inline__ __m512 __DEFAULT_FN_ATTRS512
1702_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1703{
1704 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705 (__v16sf) __W,
1706 (__mmask16) __U);
1707}
1708
1709static __inline__ __m512 __DEFAULT_FN_ATTRS512
1711{
1712 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713 (__v16sf)
1715 (__mmask16) __U);
1716}
1717
1718static __inline__ __m128 __DEFAULT_FN_ATTRS128
1719_mm_rcp14_ss(__m128 __A, __m128 __B)
1720{
1721 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1722 (__v4sf) __B,
1723 (__v4sf)
1724 _mm_setzero_ps (),
1725 (__mmask8) -1);
1726}
1727
1728static __inline__ __m128 __DEFAULT_FN_ATTRS128
1729_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1730{
1731 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732 (__v4sf) __B,
1733 (__v4sf) __W,
1734 (__mmask8) __U);
1735}
1736
1737static __inline__ __m128 __DEFAULT_FN_ATTRS128
1738_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1739{
1740 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741 (__v4sf) __B,
1742 (__v4sf) _mm_setzero_ps (),
1743 (__mmask8) __U);
1744}
1745
1746static __inline__ __m128d __DEFAULT_FN_ATTRS128
1747_mm_rcp14_sd(__m128d __A, __m128d __B)
1748{
1749 return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1750 (__v2df) __B,
1751 (__v2df)
1752 _mm_setzero_pd (),
1753 (__mmask8) -1);
1754}
1755
1756static __inline__ __m128d __DEFAULT_FN_ATTRS128
1757_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1758{
1759 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1760 (__v2df) __B,
1761 (__v2df) __W,
1762 (__mmask8) __U);
1763}
1764
1765static __inline__ __m128d __DEFAULT_FN_ATTRS128
1766_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1767{
1768 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1769 (__v2df) __B,
1770 (__v2df) _mm_setzero_pd (),
1771 (__mmask8) __U);
1772}
1773
1774static __inline __m512 __DEFAULT_FN_ATTRS512
1776{
1777 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1779 (__v16sf) __A, -1,
1781}
1782
1783static __inline__ __m512 __DEFAULT_FN_ATTRS512
1784_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1785{
1786 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1788 (__v16sf) __W, __U,
1790}
1791
1792static __inline __m512d __DEFAULT_FN_ATTRS512
1794{
1795 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1797 (__v8df) __A, -1,
1799}
1800
1801static __inline__ __m512d __DEFAULT_FN_ATTRS512
1802_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1803{
1804 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1806 (__v8df) __W, __U,
1808}
1809
1810static __inline__ __m512 __DEFAULT_FN_ATTRS512
1811_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1812{
1813 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1815 (__v16sf) __W, __U,
1817}
1818
1819static __inline __m512 __DEFAULT_FN_ATTRS512
1821{
1822 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1824 (__v16sf) __A, -1,
1826}
1827
1828static __inline __m512d __DEFAULT_FN_ATTRS512
1829_mm512_ceil_pd(__m512d __A)
1830{
1831 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1833 (__v8df) __A, -1,
1835}
1836
1837static __inline__ __m512d __DEFAULT_FN_ATTRS512
1838_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1839{
1840 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1842 (__v8df) __W, __U,
1844}
1845
1846static __inline __m512i __DEFAULT_FN_ATTRS512
1848{
1849 return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1850}
1851
1852static __inline__ __m512i __DEFAULT_FN_ATTRS512
1853_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1854{
1855 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1856 (__v8di)_mm512_abs_epi64(__A),
1857 (__v8di)__W);
1858}
1859
1860static __inline__ __m512i __DEFAULT_FN_ATTRS512
1862{
1863 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1864 (__v8di)_mm512_abs_epi64(__A),
1865 (__v8di)_mm512_setzero_si512());
1866}
1867
1868static __inline __m512i __DEFAULT_FN_ATTRS512
1870{
1871 return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1872}
1873
1874static __inline__ __m512i __DEFAULT_FN_ATTRS512
1875_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1876{
1877 return (__m512i)__builtin_ia32_selectd_512(__U,
1878 (__v16si)_mm512_abs_epi32(__A),
1879 (__v16si)__W);
1880}
1881
1882static __inline__ __m512i __DEFAULT_FN_ATTRS512
1884{
1885 return (__m512i)__builtin_ia32_selectd_512(__U,
1886 (__v16si)_mm512_abs_epi32(__A),
1887 (__v16si)_mm512_setzero_si512());
1888}
1889
1890static __inline__ __m128 __DEFAULT_FN_ATTRS128
1891_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1892 __A = _mm_add_ss(__A, __B);
1893 return __builtin_ia32_selectss_128(__U, __A, __W);
1894}
1895
1896static __inline__ __m128 __DEFAULT_FN_ATTRS128
1897_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1898 __A = _mm_add_ss(__A, __B);
1899 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1900}
1901
1902#define _mm_add_round_ss(A, B, R) \
1903 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1904 (__v4sf)(__m128)(B), \
1905 (__v4sf)_mm_setzero_ps(), \
1906 (__mmask8)-1, (int)(R))
1907
1908#define _mm_mask_add_round_ss(W, U, A, B, R) \
1909 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1910 (__v4sf)(__m128)(B), \
1911 (__v4sf)(__m128)(W), (__mmask8)(U), \
1912 (int)(R))
1913
1914#define _mm_maskz_add_round_ss(U, A, B, R) \
1915 (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1916 (__v4sf)(__m128)(B), \
1917 (__v4sf)_mm_setzero_ps(), \
1918 (__mmask8)(U), (int)(R))
1919
1920static __inline__ __m128d __DEFAULT_FN_ATTRS128
1921_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1922 __A = _mm_add_sd(__A, __B);
1923 return __builtin_ia32_selectsd_128(__U, __A, __W);
1924}
1925
1926static __inline__ __m128d __DEFAULT_FN_ATTRS128
1927_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1928 __A = _mm_add_sd(__A, __B);
1929 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1930}
1931#define _mm_add_round_sd(A, B, R) \
1932 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1933 (__v2df)(__m128d)(B), \
1934 (__v2df)_mm_setzero_pd(), \
1935 (__mmask8)-1, (int)(R))
1936
1937#define _mm_mask_add_round_sd(W, U, A, B, R) \
1938 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1939 (__v2df)(__m128d)(B), \
1940 (__v2df)(__m128d)(W), \
1941 (__mmask8)(U), (int)(R))
1942
1943#define _mm_maskz_add_round_sd(U, A, B, R) \
1944 (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945 (__v2df)(__m128d)(B), \
1946 (__v2df)_mm_setzero_pd(), \
1947 (__mmask8)(U), (int)(R))
1948
1949static __inline__ __m512d __DEFAULT_FN_ATTRS512
1950_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1951 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1952 (__v8df)_mm512_add_pd(__A, __B),
1953 (__v8df)__W);
1954}
1955
1956static __inline__ __m512d __DEFAULT_FN_ATTRS512
1957_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1958 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1959 (__v8df)_mm512_add_pd(__A, __B),
1960 (__v8df)_mm512_setzero_pd());
1961}
1962
1963static __inline__ __m512 __DEFAULT_FN_ATTRS512
1964_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1965 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1966 (__v16sf)_mm512_add_ps(__A, __B),
1967 (__v16sf)__W);
1968}
1969
1970static __inline__ __m512 __DEFAULT_FN_ATTRS512
1971_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1972 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1973 (__v16sf)_mm512_add_ps(__A, __B),
1974 (__v16sf)_mm512_setzero_ps());
1975}
1976
1977#define _mm512_add_round_pd(A, B, R) \
1978 (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1979 (__v8df)(__m512d)(B), (int)(R))
1980
1981#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1982 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1983 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1984 (__v8df)(__m512d)(W))
1985
1986#define _mm512_maskz_add_round_pd(U, A, B, R) \
1987 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1988 (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1989 (__v8df)_mm512_setzero_pd())
1990
1991#define _mm512_add_round_ps(A, B, R) \
1992 (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993 (__v16sf)(__m512)(B), (int)(R))
1994
1995#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1996 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1997 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1998 (__v16sf)(__m512)(W))
1999
2000#define _mm512_maskz_add_round_ps(U, A, B, R) \
2001 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2002 (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2003 (__v16sf)_mm512_setzero_ps())
2004
2005static __inline__ __m128 __DEFAULT_FN_ATTRS128
2006_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2007 __A = _mm_sub_ss(__A, __B);
2008 return __builtin_ia32_selectss_128(__U, __A, __W);
2009}
2010
2011static __inline__ __m128 __DEFAULT_FN_ATTRS128
2012_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2013 __A = _mm_sub_ss(__A, __B);
2014 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2015}
2016#define _mm_sub_round_ss(A, B, R) \
2017 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2018 (__v4sf)(__m128)(B), \
2019 (__v4sf)_mm_setzero_ps(), \
2020 (__mmask8)-1, (int)(R))
2021
2022#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2023 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2024 (__v4sf)(__m128)(B), \
2025 (__v4sf)(__m128)(W), (__mmask8)(U), \
2026 (int)(R))
2027
2028#define _mm_maskz_sub_round_ss(U, A, B, R) \
2029 (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2030 (__v4sf)(__m128)(B), \
2031 (__v4sf)_mm_setzero_ps(), \
2032 (__mmask8)(U), (int)(R))
2033
2034static __inline__ __m128d __DEFAULT_FN_ATTRS128
2035_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2036 __A = _mm_sub_sd(__A, __B);
2037 return __builtin_ia32_selectsd_128(__U, __A, __W);
2038}
2039
2040static __inline__ __m128d __DEFAULT_FN_ATTRS128
2041_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2042 __A = _mm_sub_sd(__A, __B);
2043 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2044}
2045
2046#define _mm_sub_round_sd(A, B, R) \
2047 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2048 (__v2df)(__m128d)(B), \
2049 (__v2df)_mm_setzero_pd(), \
2050 (__mmask8)-1, (int)(R))
2051
2052#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2053 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2054 (__v2df)(__m128d)(B), \
2055 (__v2df)(__m128d)(W), \
2056 (__mmask8)(U), (int)(R))
2057
2058#define _mm_maskz_sub_round_sd(U, A, B, R) \
2059 (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2060 (__v2df)(__m128d)(B), \
2061 (__v2df)_mm_setzero_pd(), \
2062 (__mmask8)(U), (int)(R))
2063
2064static __inline__ __m512d __DEFAULT_FN_ATTRS512
2065_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2066 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2067 (__v8df)_mm512_sub_pd(__A, __B),
2068 (__v8df)__W);
2069}
2070
2071static __inline__ __m512d __DEFAULT_FN_ATTRS512
2072_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2073 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2074 (__v8df)_mm512_sub_pd(__A, __B),
2075 (__v8df)_mm512_setzero_pd());
2076}
2077
2078static __inline__ __m512 __DEFAULT_FN_ATTRS512
2079_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2080 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2081 (__v16sf)_mm512_sub_ps(__A, __B),
2082 (__v16sf)__W);
2083}
2084
2085static __inline__ __m512 __DEFAULT_FN_ATTRS512
2086_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2087 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2088 (__v16sf)_mm512_sub_ps(__A, __B),
2089 (__v16sf)_mm512_setzero_ps());
2090}
2091
2092#define _mm512_sub_round_pd(A, B, R) \
2093 (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2094 (__v8df)(__m512d)(B), (int)(R))
2095
2096#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2097 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2098 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2099 (__v8df)(__m512d)(W))
2100
2101#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2102 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2103 (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2104 (__v8df)_mm512_setzero_pd())
2105
2106#define _mm512_sub_round_ps(A, B, R) \
2107 (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108 (__v16sf)(__m512)(B), (int)(R))
2109
2110#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2111 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2112 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2113 (__v16sf)(__m512)(W))
2114
2115#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2116 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2117 (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2118 (__v16sf)_mm512_setzero_ps())
2119
2120static __inline__ __m128 __DEFAULT_FN_ATTRS128
2121_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2122 __A = _mm_mul_ss(__A, __B);
2123 return __builtin_ia32_selectss_128(__U, __A, __W);
2124}
2125
2126static __inline__ __m128 __DEFAULT_FN_ATTRS128
2127_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2128 __A = _mm_mul_ss(__A, __B);
2129 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2130}
2131#define _mm_mul_round_ss(A, B, R) \
2132 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2133 (__v4sf)(__m128)(B), \
2134 (__v4sf)_mm_setzero_ps(), \
2135 (__mmask8)-1, (int)(R))
2136
2137#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2138 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2139 (__v4sf)(__m128)(B), \
2140 (__v4sf)(__m128)(W), (__mmask8)(U), \
2141 (int)(R))
2142
2143#define _mm_maskz_mul_round_ss(U, A, B, R) \
2144 (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2145 (__v4sf)(__m128)(B), \
2146 (__v4sf)_mm_setzero_ps(), \
2147 (__mmask8)(U), (int)(R))
2148
2149static __inline__ __m128d __DEFAULT_FN_ATTRS128
2150_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2151 __A = _mm_mul_sd(__A, __B);
2152 return __builtin_ia32_selectsd_128(__U, __A, __W);
2153}
2154
2155static __inline__ __m128d __DEFAULT_FN_ATTRS128
2156_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2157 __A = _mm_mul_sd(__A, __B);
2158 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2159}
2160
2161#define _mm_mul_round_sd(A, B, R) \
2162 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2163 (__v2df)(__m128d)(B), \
2164 (__v2df)_mm_setzero_pd(), \
2165 (__mmask8)-1, (int)(R))
2166
2167#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2168 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2169 (__v2df)(__m128d)(B), \
2170 (__v2df)(__m128d)(W), \
2171 (__mmask8)(U), (int)(R))
2172
2173#define _mm_maskz_mul_round_sd(U, A, B, R) \
2174 (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2175 (__v2df)(__m128d)(B), \
2176 (__v2df)_mm_setzero_pd(), \
2177 (__mmask8)(U), (int)(R))
2178
2179static __inline__ __m512d __DEFAULT_FN_ATTRS512
2180_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2181 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2182 (__v8df)_mm512_mul_pd(__A, __B),
2183 (__v8df)__W);
2184}
2185
2186static __inline__ __m512d __DEFAULT_FN_ATTRS512
2187_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2188 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2189 (__v8df)_mm512_mul_pd(__A, __B),
2190 (__v8df)_mm512_setzero_pd());
2191}
2192
2193static __inline__ __m512 __DEFAULT_FN_ATTRS512
2194_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2195 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2196 (__v16sf)_mm512_mul_ps(__A, __B),
2197 (__v16sf)__W);
2198}
2199
2200static __inline__ __m512 __DEFAULT_FN_ATTRS512
2201_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2202 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2203 (__v16sf)_mm512_mul_ps(__A, __B),
2204 (__v16sf)_mm512_setzero_ps());
2205}
2206
2207#define _mm512_mul_round_pd(A, B, R) \
2208 (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2209 (__v8df)(__m512d)(B), (int)(R))
2210
2211#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2212 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2213 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2214 (__v8df)(__m512d)(W))
2215
2216#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2217 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2218 (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2219 (__v8df)_mm512_setzero_pd())
2220
2221#define _mm512_mul_round_ps(A, B, R) \
2222 (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223 (__v16sf)(__m512)(B), (int)(R))
2224
2225#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2226 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2227 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2228 (__v16sf)(__m512)(W))
2229
2230#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2231 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2232 (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2233 (__v16sf)_mm512_setzero_ps())
2234
2235static __inline__ __m128 __DEFAULT_FN_ATTRS128
2236_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2237 __A = _mm_div_ss(__A, __B);
2238 return __builtin_ia32_selectss_128(__U, __A, __W);
2239}
2240
2241static __inline__ __m128 __DEFAULT_FN_ATTRS128
2242_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2243 __A = _mm_div_ss(__A, __B);
2244 return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2245}
2246
2247#define _mm_div_round_ss(A, B, R) \
2248 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2249 (__v4sf)(__m128)(B), \
2250 (__v4sf)_mm_setzero_ps(), \
2251 (__mmask8)-1, (int)(R))
2252
2253#define _mm_mask_div_round_ss(W, U, A, B, R) \
2254 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2255 (__v4sf)(__m128)(B), \
2256 (__v4sf)(__m128)(W), (__mmask8)(U), \
2257 (int)(R))
2258
2259#define _mm_maskz_div_round_ss(U, A, B, R) \
2260 (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2261 (__v4sf)(__m128)(B), \
2262 (__v4sf)_mm_setzero_ps(), \
2263 (__mmask8)(U), (int)(R))
2264
2265static __inline__ __m128d __DEFAULT_FN_ATTRS128
2266_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2267 __A = _mm_div_sd(__A, __B);
2268 return __builtin_ia32_selectsd_128(__U, __A, __W);
2269}
2270
2271static __inline__ __m128d __DEFAULT_FN_ATTRS128
2272_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2273 __A = _mm_div_sd(__A, __B);
2274 return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2275}
2276
2277#define _mm_div_round_sd(A, B, R) \
2278 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2279 (__v2df)(__m128d)(B), \
2280 (__v2df)_mm_setzero_pd(), \
2281 (__mmask8)-1, (int)(R))
2282
2283#define _mm_mask_div_round_sd(W, U, A, B, R) \
2284 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2285 (__v2df)(__m128d)(B), \
2286 (__v2df)(__m128d)(W), \
2287 (__mmask8)(U), (int)(R))
2288
2289#define _mm_maskz_div_round_sd(U, A, B, R) \
2290 (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2291 (__v2df)(__m128d)(B), \
2292 (__v2df)_mm_setzero_pd(), \
2293 (__mmask8)(U), (int)(R))
2294
2295static __inline __m512d __DEFAULT_FN_ATTRS512
2296_mm512_div_pd(__m512d __a, __m512d __b)
2297{
2298 return (__m512d)((__v8df)__a/(__v8df)__b);
2299}
2300
2301static __inline__ __m512d __DEFAULT_FN_ATTRS512
2302_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2303 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2304 (__v8df)_mm512_div_pd(__A, __B),
2305 (__v8df)__W);
2306}
2307
2308static __inline__ __m512d __DEFAULT_FN_ATTRS512
2309_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2310 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2311 (__v8df)_mm512_div_pd(__A, __B),
2312 (__v8df)_mm512_setzero_pd());
2313}
2314
2315static __inline __m512 __DEFAULT_FN_ATTRS512
2316_mm512_div_ps(__m512 __a, __m512 __b)
2317{
2318 return (__m512)((__v16sf)__a/(__v16sf)__b);
2319}
2320
2321static __inline__ __m512 __DEFAULT_FN_ATTRS512
2322_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2323 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2324 (__v16sf)_mm512_div_ps(__A, __B),
2325 (__v16sf)__W);
2326}
2327
2328static __inline__ __m512 __DEFAULT_FN_ATTRS512
2329_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2330 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2331 (__v16sf)_mm512_div_ps(__A, __B),
2332 (__v16sf)_mm512_setzero_ps());
2333}
2334
2335#define _mm512_div_round_pd(A, B, R) \
2336 (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2337 (__v8df)(__m512d)(B), (int)(R))
2338
2339#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2340 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2341 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2342 (__v8df)(__m512d)(W))
2343
2344#define _mm512_maskz_div_round_pd(U, A, B, R) \
2345 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2346 (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2347 (__v8df)_mm512_setzero_pd())
2348
2349#define _mm512_div_round_ps(A, B, R) \
2350 (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2351 (__v16sf)(__m512)(B), (int)(R))
2352
2353#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2354 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2355 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2356 (__v16sf)(__m512)(W))
2357
2358#define _mm512_maskz_div_round_ps(U, A, B, R) \
2359 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2360 (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2361 (__v16sf)_mm512_setzero_ps())
2362
2363#define _mm512_roundscale_ps(A, B) \
2364 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2365 (__v16sf)_mm512_undefined_ps(), \
2366 (__mmask16)-1, \
2367 _MM_FROUND_CUR_DIRECTION)
2368
2369#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2370 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2371 (__v16sf)(__m512)(A), (__mmask16)(B), \
2372 _MM_FROUND_CUR_DIRECTION)
2373
2374#define _mm512_maskz_roundscale_ps(A, B, imm) \
2375 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2376 (__v16sf)_mm512_setzero_ps(), \
2377 (__mmask16)(A), \
2378 _MM_FROUND_CUR_DIRECTION)
2379
2380#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2381 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2382 (__v16sf)(__m512)(A), (__mmask16)(B), \
2383 (int)(R))
2384
2385#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2386 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2387 (__v16sf)_mm512_setzero_ps(), \
2388 (__mmask16)(A), (int)(R))
2389
2390#define _mm512_roundscale_round_ps(A, imm, R) \
2391 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2392 (__v16sf)_mm512_undefined_ps(), \
2393 (__mmask16)-1, (int)(R))
2394
2395#define _mm512_roundscale_pd(A, B) \
2396 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2397 (__v8df)_mm512_undefined_pd(), \
2398 (__mmask8)-1, \
2399 _MM_FROUND_CUR_DIRECTION)
2400
2401#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2402 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2403 (__v8df)(__m512d)(A), (__mmask8)(B), \
2404 _MM_FROUND_CUR_DIRECTION)
2405
2406#define _mm512_maskz_roundscale_pd(A, B, imm) \
2407 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2408 (__v8df)_mm512_setzero_pd(), \
2409 (__mmask8)(A), \
2410 _MM_FROUND_CUR_DIRECTION)
2411
2412#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2413 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2414 (__v8df)(__m512d)(A), (__mmask8)(B), \
2415 (int)(R))
2416
2417#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2418 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2419 (__v8df)_mm512_setzero_pd(), \
2420 (__mmask8)(A), (int)(R))
2421
2422#define _mm512_roundscale_round_pd(A, imm, R) \
2423 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2424 (__v8df)_mm512_undefined_pd(), \
2425 (__mmask8)-1, (int)(R))
2426
2427#define _mm512_fmadd_round_pd(A, B, C, R) \
2428 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2429 (__v8df)(__m512d)(B), \
2430 (__v8df)(__m512d)(C), \
2431 (__mmask8)-1, (int)(R))
2432
2433
2434#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2435 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2436 (__v8df)(__m512d)(B), \
2437 (__v8df)(__m512d)(C), \
2438 (__mmask8)(U), (int)(R))
2439
2440
2441#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2442 (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2443 (__v8df)(__m512d)(B), \
2444 (__v8df)(__m512d)(C), \
2445 (__mmask8)(U), (int)(R))
2446
2447
2448#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2449 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2450 (__v8df)(__m512d)(B), \
2451 (__v8df)(__m512d)(C), \
2452 (__mmask8)(U), (int)(R))
2453
2454
2455#define _mm512_fmsub_round_pd(A, B, C, R) \
2456 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2457 (__v8df)(__m512d)(B), \
2458 -(__v8df)(__m512d)(C), \
2459 (__mmask8)-1, (int)(R))
2460
2461
2462#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2463 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2464 (__v8df)(__m512d)(B), \
2465 -(__v8df)(__m512d)(C), \
2466 (__mmask8)(U), (int)(R))
2467
2468
2469#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2470 (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2471 (__v8df)(__m512d)(B), \
2472 -(__v8df)(__m512d)(C), \
2473 (__mmask8)(U), (int)(R))
2474
2475
2476#define _mm512_fnmadd_round_pd(A, B, C, R) \
2477 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2478 (__v8df)(__m512d)(B), \
2479 (__v8df)(__m512d)(C), \
2480 (__mmask8)-1, (int)(R))
2481
2482
2483#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2484 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2485 (__v8df)(__m512d)(B), \
2486 (__v8df)(__m512d)(C), \
2487 (__mmask8)(U), (int)(R))
2488
2489
2490#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2491 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2492 (__v8df)(__m512d)(B), \
2493 (__v8df)(__m512d)(C), \
2494 (__mmask8)(U), (int)(R))
2495
2496
2497#define _mm512_fnmsub_round_pd(A, B, C, R) \
2498 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2499 (__v8df)(__m512d)(B), \
2500 -(__v8df)(__m512d)(C), \
2501 (__mmask8)-1, (int)(R))
2502
2503
2504#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2505 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506 (__v8df)(__m512d)(B), \
2507 -(__v8df)(__m512d)(C), \
2508 (__mmask8)(U), (int)(R))
2509
2510
2511static __inline__ __m512d __DEFAULT_FN_ATTRS512
2512_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2513{
2514 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2515 (__v8df) __B,
2516 (__v8df) __C,
2517 (__mmask8) -1,
2519}
2520
2521static __inline__ __m512d __DEFAULT_FN_ATTRS512
2522_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2523{
2524 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2525 (__v8df) __B,
2526 (__v8df) __C,
2527 (__mmask8) __U,
2529}
2530
2531static __inline__ __m512d __DEFAULT_FN_ATTRS512
2532_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2533{
2534 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2535 (__v8df) __B,
2536 (__v8df) __C,
2537 (__mmask8) __U,
2539}
2540
2541static __inline__ __m512d __DEFAULT_FN_ATTRS512
2542_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2543{
2544 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2545 (__v8df) __B,
2546 (__v8df) __C,
2547 (__mmask8) __U,
2549}
2550
2551static __inline__ __m512d __DEFAULT_FN_ATTRS512
2552_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2553{
2554 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2555 (__v8df) __B,
2556 -(__v8df) __C,
2557 (__mmask8) -1,
2559}
2560
2561static __inline__ __m512d __DEFAULT_FN_ATTRS512
2562_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2563{
2564 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2565 (__v8df) __B,
2566 -(__v8df) __C,
2567 (__mmask8) __U,
2569}
2570
2571static __inline__ __m512d __DEFAULT_FN_ATTRS512
2572_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2573{
2574 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2575 (__v8df) __B,
2576 -(__v8df) __C,
2577 (__mmask8) __U,
2579}
2580
2581static __inline__ __m512d __DEFAULT_FN_ATTRS512
2582_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2583{
2584 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2585 -(__v8df) __B,
2586 (__v8df) __C,
2587 (__mmask8) -1,
2589}
2590
2591static __inline__ __m512d __DEFAULT_FN_ATTRS512
2592_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2593{
2594 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2595 (__v8df) __B,
2596 (__v8df) __C,
2597 (__mmask8) __U,
2599}
2600
2601static __inline__ __m512d __DEFAULT_FN_ATTRS512
2602_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2603{
2604 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2605 (__v8df) __B,
2606 (__v8df) __C,
2607 (__mmask8) __U,
2609}
2610
2611static __inline__ __m512d __DEFAULT_FN_ATTRS512
2612_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2613{
2614 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2615 -(__v8df) __B,
2616 -(__v8df) __C,
2617 (__mmask8) -1,
2619}
2620
2621static __inline__ __m512d __DEFAULT_FN_ATTRS512
2622_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2623{
2624 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2625 (__v8df) __B,
2626 -(__v8df) __C,
2627 (__mmask8) __U,
2629}
2630
2631#define _mm512_fmadd_round_ps(A, B, C, R) \
2632 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2633 (__v16sf)(__m512)(B), \
2634 (__v16sf)(__m512)(C), \
2635 (__mmask16)-1, (int)(R))
2636
2637
2638#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2639 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2640 (__v16sf)(__m512)(B), \
2641 (__v16sf)(__m512)(C), \
2642 (__mmask16)(U), (int)(R))
2643
2644
2645#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2646 (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2647 (__v16sf)(__m512)(B), \
2648 (__v16sf)(__m512)(C), \
2649 (__mmask16)(U), (int)(R))
2650
2651
2652#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2653 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2654 (__v16sf)(__m512)(B), \
2655 (__v16sf)(__m512)(C), \
2656 (__mmask16)(U), (int)(R))
2657
2658
2659#define _mm512_fmsub_round_ps(A, B, C, R) \
2660 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2661 (__v16sf)(__m512)(B), \
2662 -(__v16sf)(__m512)(C), \
2663 (__mmask16)-1, (int)(R))
2664
2665
2666#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2667 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2668 (__v16sf)(__m512)(B), \
2669 -(__v16sf)(__m512)(C), \
2670 (__mmask16)(U), (int)(R))
2671
2672
2673#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2674 (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2675 (__v16sf)(__m512)(B), \
2676 -(__v16sf)(__m512)(C), \
2677 (__mmask16)(U), (int)(R))
2678
2679
2680#define _mm512_fnmadd_round_ps(A, B, C, R) \
2681 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682 -(__v16sf)(__m512)(B), \
2683 (__v16sf)(__m512)(C), \
2684 (__mmask16)-1, (int)(R))
2685
2686
2687#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2688 (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2689 (__v16sf)(__m512)(B), \
2690 (__v16sf)(__m512)(C), \
2691 (__mmask16)(U), (int)(R))
2692
2693
2694#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2695 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2696 (__v16sf)(__m512)(B), \
2697 (__v16sf)(__m512)(C), \
2698 (__mmask16)(U), (int)(R))
2699
2700
2701#define _mm512_fnmsub_round_ps(A, B, C, R) \
2702 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2703 -(__v16sf)(__m512)(B), \
2704 -(__v16sf)(__m512)(C), \
2705 (__mmask16)-1, (int)(R))
2706
2707
2708#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2709 (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710 (__v16sf)(__m512)(B), \
2711 -(__v16sf)(__m512)(C), \
2712 (__mmask16)(U), (int)(R))
2713
2714
2715static __inline__ __m512 __DEFAULT_FN_ATTRS512
2716_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2717{
2718 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2719 (__v16sf) __B,
2720 (__v16sf) __C,
2721 (__mmask16) -1,
2723}
2724
2725static __inline__ __m512 __DEFAULT_FN_ATTRS512
2726_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2727{
2728 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729 (__v16sf) __B,
2730 (__v16sf) __C,
2731 (__mmask16) __U,
2733}
2734
2735static __inline__ __m512 __DEFAULT_FN_ATTRS512
2736_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2737{
2738 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2739 (__v16sf) __B,
2740 (__v16sf) __C,
2741 (__mmask16) __U,
2743}
2744
2745static __inline__ __m512 __DEFAULT_FN_ATTRS512
2746_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2747{
2748 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2749 (__v16sf) __B,
2750 (__v16sf) __C,
2751 (__mmask16) __U,
2753}
2754
2755static __inline__ __m512 __DEFAULT_FN_ATTRS512
2756_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2757{
2758 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2759 (__v16sf) __B,
2760 -(__v16sf) __C,
2761 (__mmask16) -1,
2763}
2764
2765static __inline__ __m512 __DEFAULT_FN_ATTRS512
2766_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2767{
2768 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769 (__v16sf) __B,
2770 -(__v16sf) __C,
2771 (__mmask16) __U,
2773}
2774
2775static __inline__ __m512 __DEFAULT_FN_ATTRS512
2776_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2777{
2778 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2779 (__v16sf) __B,
2780 -(__v16sf) __C,
2781 (__mmask16) __U,
2783}
2784
2785static __inline__ __m512 __DEFAULT_FN_ATTRS512
2786_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2787{
2788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2789 -(__v16sf) __B,
2790 (__v16sf) __C,
2791 (__mmask16) -1,
2793}
2794
2795static __inline__ __m512 __DEFAULT_FN_ATTRS512
2796_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2797{
2798 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2799 (__v16sf) __B,
2800 (__v16sf) __C,
2801 (__mmask16) __U,
2803}
2804
2805static __inline__ __m512 __DEFAULT_FN_ATTRS512
2806_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2807{
2808 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2809 (__v16sf) __B,
2810 (__v16sf) __C,
2811 (__mmask16) __U,
2813}
2814
2815static __inline__ __m512 __DEFAULT_FN_ATTRS512
2816_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2817{
2818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2819 -(__v16sf) __B,
2820 -(__v16sf) __C,
2821 (__mmask16) -1,
2823}
2824
2825static __inline__ __m512 __DEFAULT_FN_ATTRS512
2826_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2827{
2828 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2829 (__v16sf) __B,
2830 -(__v16sf) __C,
2831 (__mmask16) __U,
2833}
2834
2835#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2836 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2837 (__v8df)(__m512d)(B), \
2838 (__v8df)(__m512d)(C), \
2839 (__mmask8)-1, (int)(R))
2840
2841
2842#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2843 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2844 (__v8df)(__m512d)(B), \
2845 (__v8df)(__m512d)(C), \
2846 (__mmask8)(U), (int)(R))
2847
2848
2849#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2850 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2851 (__v8df)(__m512d)(B), \
2852 (__v8df)(__m512d)(C), \
2853 (__mmask8)(U), (int)(R))
2854
2855
2856#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2857 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2858 (__v8df)(__m512d)(B), \
2859 (__v8df)(__m512d)(C), \
2860 (__mmask8)(U), (int)(R))
2861
2862
2863#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2864 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2865 (__v8df)(__m512d)(B), \
2866 -(__v8df)(__m512d)(C), \
2867 (__mmask8)-1, (int)(R))
2868
2869
2870#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2871 (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2872 (__v8df)(__m512d)(B), \
2873 -(__v8df)(__m512d)(C), \
2874 (__mmask8)(U), (int)(R))
2875
2876
2877#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2878 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2879 (__v8df)(__m512d)(B), \
2880 -(__v8df)(__m512d)(C), \
2881 (__mmask8)(U), (int)(R))
2882
2883
2884static __inline__ __m512d __DEFAULT_FN_ATTRS512
2885_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2886{
2887 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2888 (__v8df) __B,
2889 (__v8df) __C,
2890 (__mmask8) -1,
2892}
2893
2894static __inline__ __m512d __DEFAULT_FN_ATTRS512
2895_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2896{
2897 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2898 (__v8df) __B,
2899 (__v8df) __C,
2900 (__mmask8) __U,
2902}
2903
2904static __inline__ __m512d __DEFAULT_FN_ATTRS512
2905_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2906{
2907 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2908 (__v8df) __B,
2909 (__v8df) __C,
2910 (__mmask8) __U,
2912}
2913
2914static __inline__ __m512d __DEFAULT_FN_ATTRS512
2915_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2916{
2917 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2918 (__v8df) __B,
2919 (__v8df) __C,
2920 (__mmask8) __U,
2922}
2923
2924static __inline__ __m512d __DEFAULT_FN_ATTRS512
2925_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2926{
2927 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2928 (__v8df) __B,
2929 -(__v8df) __C,
2930 (__mmask8) -1,
2932}
2933
2934static __inline__ __m512d __DEFAULT_FN_ATTRS512
2935_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2936{
2937 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2938 (__v8df) __B,
2939 -(__v8df) __C,
2940 (__mmask8) __U,
2942}
2943
2944static __inline__ __m512d __DEFAULT_FN_ATTRS512
2945_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2946{
2947 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2948 (__v8df) __B,
2949 -(__v8df) __C,
2950 (__mmask8) __U,
2952}
2953
2954#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2955 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2956 (__v16sf)(__m512)(B), \
2957 (__v16sf)(__m512)(C), \
2958 (__mmask16)-1, (int)(R))
2959
2960
2961#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2962 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2963 (__v16sf)(__m512)(B), \
2964 (__v16sf)(__m512)(C), \
2965 (__mmask16)(U), (int)(R))
2966
2967
2968#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2969 (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2970 (__v16sf)(__m512)(B), \
2971 (__v16sf)(__m512)(C), \
2972 (__mmask16)(U), (int)(R))
2973
2974
2975#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2976 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2977 (__v16sf)(__m512)(B), \
2978 (__v16sf)(__m512)(C), \
2979 (__mmask16)(U), (int)(R))
2980
2981
2982#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2983 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2984 (__v16sf)(__m512)(B), \
2985 -(__v16sf)(__m512)(C), \
2986 (__mmask16)-1, (int)(R))
2987
2988
2989#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2990 (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2991 (__v16sf)(__m512)(B), \
2992 -(__v16sf)(__m512)(C), \
2993 (__mmask16)(U), (int)(R))
2994
2995
2996#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2997 (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2998 (__v16sf)(__m512)(B), \
2999 -(__v16sf)(__m512)(C), \
3000 (__mmask16)(U), (int)(R))
3001
3002
3003static __inline__ __m512 __DEFAULT_FN_ATTRS512
3004_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3005{
3006 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3007 (__v16sf) __B,
3008 (__v16sf) __C,
3009 (__mmask16) -1,
3011}
3012
3013static __inline__ __m512 __DEFAULT_FN_ATTRS512
3014_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3015{
3016 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017 (__v16sf) __B,
3018 (__v16sf) __C,
3019 (__mmask16) __U,
3021}
3022
3023static __inline__ __m512 __DEFAULT_FN_ATTRS512
3024_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3025{
3026 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3027 (__v16sf) __B,
3028 (__v16sf) __C,
3029 (__mmask16) __U,
3031}
3032
3033static __inline__ __m512 __DEFAULT_FN_ATTRS512
3034_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3035{
3036 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3037 (__v16sf) __B,
3038 (__v16sf) __C,
3039 (__mmask16) __U,
3041}
3042
3043static __inline__ __m512 __DEFAULT_FN_ATTRS512
3044_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3045{
3046 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3047 (__v16sf) __B,
3048 -(__v16sf) __C,
3049 (__mmask16) -1,
3051}
3052
3053static __inline__ __m512 __DEFAULT_FN_ATTRS512
3054_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3055{
3056 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057 (__v16sf) __B,
3058 -(__v16sf) __C,
3059 (__mmask16) __U,
3061}
3062
3063static __inline__ __m512 __DEFAULT_FN_ATTRS512
3064_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3065{
3066 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3067 (__v16sf) __B,
3068 -(__v16sf) __C,
3069 (__mmask16) __U,
3071}
3072
3073#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3074 (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3075 (__v8df)(__m512d)(B), \
3076 (__v8df)(__m512d)(C), \
3077 (__mmask8)(U), (int)(R))
3078
3079
3080static __inline__ __m512d __DEFAULT_FN_ATTRS512
3081_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3082{
3083 return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3084 (__v8df) __B,
3085 (__v8df) __C,
3086 (__mmask8) __U,
3088}
3089
3090#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3091 (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3092 (__v16sf)(__m512)(B), \
3093 (__v16sf)(__m512)(C), \
3094 (__mmask16)(U), (int)(R))
3095
3096static __inline__ __m512 __DEFAULT_FN_ATTRS512
3097_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3098{
3099 return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3100 (__v16sf) __B,
3101 (__v16sf) __C,
3102 (__mmask16) __U,
3104}
3105
3106#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3107 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3108 (__v8df)(__m512d)(B), \
3109 (__v8df)(__m512d)(C), \
3110 (__mmask8)(U), (int)(R))
3111
3112
3113static __inline__ __m512d __DEFAULT_FN_ATTRS512
3114_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3115{
3116 return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3117 (__v8df) __B,
3118 (__v8df) __C,
3119 (__mmask8) __U,
3121}
3122
3123#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3124 (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3125 (__v16sf)(__m512)(B), \
3126 (__v16sf)(__m512)(C), \
3127 (__mmask16)(U), (int)(R))
3128
3129
3130static __inline__ __m512 __DEFAULT_FN_ATTRS512
3131_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3132{
3133 return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3134 (__v16sf) __B,
3135 (__v16sf) __C,
3136 (__mmask16) __U,
3138}
3139
3140#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3141 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3142 -(__v8df)(__m512d)(B), \
3143 (__v8df)(__m512d)(C), \
3144 (__mmask8)(U), (int)(R))
3145
3146
3147static __inline__ __m512d __DEFAULT_FN_ATTRS512
3148_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3149{
3150 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3151 -(__v8df) __B,
3152 (__v8df) __C,
3153 (__mmask8) __U,
3155}
3156
3157#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3158 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3159 -(__v16sf)(__m512)(B), \
3160 (__v16sf)(__m512)(C), \
3161 (__mmask16)(U), (int)(R))
3162
3163
3164static __inline__ __m512 __DEFAULT_FN_ATTRS512
3165_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3166{
3167 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3168 -(__v16sf) __B,
3169 (__v16sf) __C,
3170 (__mmask16) __U,
3172}
3173
3174#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3175 (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3176 -(__v8df)(__m512d)(B), \
3177 -(__v8df)(__m512d)(C), \
3178 (__mmask8)(U), (int)(R))
3179
3180
3181#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3182 (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3183 (__v8df)(__m512d)(B), \
3184 (__v8df)(__m512d)(C), \
3185 (__mmask8)(U), (int)(R))
3186
3187
3188static __inline__ __m512d __DEFAULT_FN_ATTRS512
3189_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3190{
3191 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3192 -(__v8df) __B,
3193 -(__v8df) __C,
3194 (__mmask8) __U,
3196}
3197
3198static __inline__ __m512d __DEFAULT_FN_ATTRS512
3199_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3200{
3201 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3202 (__v8df) __B,
3203 (__v8df) __C,
3204 (__mmask8) __U,
3206}
3207
3208#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3209 (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3210 -(__v16sf)(__m512)(B), \
3211 -(__v16sf)(__m512)(C), \
3212 (__mmask16)(U), (int)(R))
3213
3214
3215#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3216 (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3217 (__v16sf)(__m512)(B), \
3218 (__v16sf)(__m512)(C), \
3219 (__mmask16)(U), (int)(R))
3220
3221
3222static __inline__ __m512 __DEFAULT_FN_ATTRS512
3223_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3224{
3225 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3226 -(__v16sf) __B,
3227 -(__v16sf) __C,
3228 (__mmask16) __U,
3230}
3231
3232static __inline__ __m512 __DEFAULT_FN_ATTRS512
3233_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3234{
3235 return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3236 (__v16sf) __B,
3237 (__v16sf) __C,
3238 (__mmask16) __U,
3240}
3241
3242
3243
3244/* Vector permutations */
3245
3246static __inline __m512i __DEFAULT_FN_ATTRS512
3247_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3248{
3249 return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3250 (__v16si) __B);
3251}
3252
3253static __inline__ __m512i __DEFAULT_FN_ATTRS512
3254_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3255 __m512i __B)
3256{
3257 return (__m512i)__builtin_ia32_selectd_512(__U,
3258 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3259 (__v16si)__A);
3260}
3261
3262static __inline__ __m512i __DEFAULT_FN_ATTRS512
3263_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3264 __m512i __B)
3265{
3266 return (__m512i)__builtin_ia32_selectd_512(__U,
3267 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3268 (__v16si)__I);
3269}
3270
3271static __inline__ __m512i __DEFAULT_FN_ATTRS512
3272_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3273 __m512i __B)
3274{
3275 return (__m512i)__builtin_ia32_selectd_512(__U,
3276 (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3277 (__v16si)_mm512_setzero_si512());
3278}
3279
3280static __inline __m512i __DEFAULT_FN_ATTRS512
3281_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3282{
3283 return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3284 (__v8di) __B);
3285}
3286
3287static __inline__ __m512i __DEFAULT_FN_ATTRS512
3288_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3289 __m512i __B)
3290{
3291 return (__m512i)__builtin_ia32_selectq_512(__U,
3292 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3293 (__v8di)__A);
3294}
3295
3296static __inline__ __m512i __DEFAULT_FN_ATTRS512
3297_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3298 __m512i __B)
3299{
3300 return (__m512i)__builtin_ia32_selectq_512(__U,
3301 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3302 (__v8di)__I);
3303}
3304
3305static __inline__ __m512i __DEFAULT_FN_ATTRS512
3306_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3307 __m512i __B)
3308{
3309 return (__m512i)__builtin_ia32_selectq_512(__U,
3310 (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3311 (__v8di)_mm512_setzero_si512());
3312}
3313
3314#define _mm512_alignr_epi64(A, B, I) \
3315 (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3316 (__v8di)(__m512i)(B), (int)(I))
3317
3318#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3319 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3320 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3321 (__v8di)(__m512i)(W))
3322
3323#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3324 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3325 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3326 (__v8di)_mm512_setzero_si512())
3327
3328#define _mm512_alignr_epi32(A, B, I) \
3329 (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3330 (__v16si)(__m512i)(B), (int)(I))
3331
3332#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3333 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3334 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3335 (__v16si)(__m512i)(W))
3336
3337#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3338 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3339 (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3340 (__v16si)_mm512_setzero_si512())
3341/* Vector Extract */
3342
3343#define _mm512_extractf64x4_pd(A, I) \
3344 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3345 (__v4df)_mm256_undefined_pd(), \
3346 (__mmask8)-1)
3347
3348#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3349 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3350 (__v4df)(__m256d)(W), \
3351 (__mmask8)(U))
3352
3353#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3354 (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3355 (__v4df)_mm256_setzero_pd(), \
3356 (__mmask8)(U))
3357
3358#define _mm512_extractf32x4_ps(A, I) \
3359 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3360 (__v4sf)_mm_undefined_ps(), \
3361 (__mmask8)-1)
3362
3363#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3364 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3365 (__v4sf)(__m128)(W), \
3366 (__mmask8)(U))
3367
3368#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3369 (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3370 (__v4sf)_mm_setzero_ps(), \
3371 (__mmask8)(U))
3372
3373/* Vector Blend */
3374
3375static __inline __m512d __DEFAULT_FN_ATTRS512
3376_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3377{
3378 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3379 (__v8df) __W,
3380 (__v8df) __A);
3381}
3382
3383static __inline __m512 __DEFAULT_FN_ATTRS512
3384_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3385{
3386 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3387 (__v16sf) __W,
3388 (__v16sf) __A);
3389}
3390
3391static __inline __m512i __DEFAULT_FN_ATTRS512
3392_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3393{
3394 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3395 (__v8di) __W,
3396 (__v8di) __A);
3397}
3398
3399static __inline __m512i __DEFAULT_FN_ATTRS512
3400_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3401{
3402 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3403 (__v16si) __W,
3404 (__v16si) __A);
3405}
3406
3407/* Compare */
3408
3409#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3410 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3411 (__v16sf)(__m512)(B), (int)(P), \
3412 (__mmask16)-1, (int)(R))
3413
3414#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3415 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3416 (__v16sf)(__m512)(B), (int)(P), \
3417 (__mmask16)(U), (int)(R))
3418
3419#define _mm512_cmp_ps_mask(A, B, P) \
3420 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3421#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3422 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3423
3424#define _mm512_cmpeq_ps_mask(A, B) \
3425 _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3426#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3427 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3428
3429#define _mm512_cmplt_ps_mask(A, B) \
3430 _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3431#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3432 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3433
3434#define _mm512_cmple_ps_mask(A, B) \
3435 _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3436#define _mm512_mask_cmple_ps_mask(k, A, B) \
3437 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3438
3439#define _mm512_cmpunord_ps_mask(A, B) \
3440 _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3441#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3442 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3443
3444#define _mm512_cmpneq_ps_mask(A, B) \
3445 _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3446#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3447 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3448
3449#define _mm512_cmpnlt_ps_mask(A, B) \
3450 _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3451#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3452 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3453
3454#define _mm512_cmpnle_ps_mask(A, B) \
3455 _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3456#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3457 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3458
3459#define _mm512_cmpord_ps_mask(A, B) \
3460 _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3461#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3462 _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3463
3464#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3465 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3466 (__v8df)(__m512d)(B), (int)(P), \
3467 (__mmask8)-1, (int)(R))
3468
3469#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3470 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3471 (__v8df)(__m512d)(B), (int)(P), \
3472 (__mmask8)(U), (int)(R))
3473
3474#define _mm512_cmp_pd_mask(A, B, P) \
3475 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3476#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3477 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3478
3479#define _mm512_cmpeq_pd_mask(A, B) \
3480 _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3481#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3482 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3483
3484#define _mm512_cmplt_pd_mask(A, B) \
3485 _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3486#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3487 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3488
3489#define _mm512_cmple_pd_mask(A, B) \
3490 _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3491#define _mm512_mask_cmple_pd_mask(k, A, B) \
3492 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3493
3494#define _mm512_cmpunord_pd_mask(A, B) \
3495 _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3496#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3497 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3498
3499#define _mm512_cmpneq_pd_mask(A, B) \
3500 _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3501#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3502 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3503
3504#define _mm512_cmpnlt_pd_mask(A, B) \
3505 _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3506#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3507 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3508
3509#define _mm512_cmpnle_pd_mask(A, B) \
3510 _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3511#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3512 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3513
3514#define _mm512_cmpord_pd_mask(A, B) \
3515 _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3516#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3517 _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3518
3519/* Conversion */
3520
3521#define _mm512_cvtt_roundps_epu32(A, R) \
3522 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3523 (__v16si)_mm512_undefined_epi32(), \
3524 (__mmask16)-1, (int)(R))
3525
3526#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3527 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3528 (__v16si)(__m512i)(W), \
3529 (__mmask16)(U), (int)(R))
3530
3531#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3532 (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533 (__v16si)_mm512_setzero_si512(), \
3534 (__mmask16)(U), (int)(R))
3535
3536
3537static __inline __m512i __DEFAULT_FN_ATTRS512
3539{
3540 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3541 (__v16si)
3543 (__mmask16) -1,
3545}
3546
3547static __inline__ __m512i __DEFAULT_FN_ATTRS512
3548_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3549{
3550 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3551 (__v16si) __W,
3552 (__mmask16) __U,
3554}
3555
3556static __inline__ __m512i __DEFAULT_FN_ATTRS512
3558{
3559 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3560 (__v16si) _mm512_setzero_si512 (),
3561 (__mmask16) __U,
3563}
3564
3565#define _mm512_cvt_roundepi32_ps(A, R) \
3566 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3567 (__v16sf)_mm512_setzero_ps(), \
3568 (__mmask16)-1, (int)(R))
3569
3570#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3571 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3572 (__v16sf)(__m512)(W), \
3573 (__mmask16)(U), (int)(R))
3574
3575#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3576 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577 (__v16sf)_mm512_setzero_ps(), \
3578 (__mmask16)(U), (int)(R))
3579
3580#define _mm512_cvt_roundepu32_ps(A, R) \
3581 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3582 (__v16sf)_mm512_setzero_ps(), \
3583 (__mmask16)-1, (int)(R))
3584
3585#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3586 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3587 (__v16sf)(__m512)(W), \
3588 (__mmask16)(U), (int)(R))
3589
3590#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3591 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592 (__v16sf)_mm512_setzero_ps(), \
3593 (__mmask16)(U), (int)(R))
3594
3595static __inline__ __m512 __DEFAULT_FN_ATTRS512
3597{
3598 return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3599}
3600
3601static __inline__ __m512 __DEFAULT_FN_ATTRS512
3602_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3603{
3604 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3605 (__v16sf)_mm512_cvtepu32_ps(__A),
3606 (__v16sf)__W);
3607}
3608
3609static __inline__ __m512 __DEFAULT_FN_ATTRS512
3611{
3612 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3613 (__v16sf)_mm512_cvtepu32_ps(__A),
3614 (__v16sf)_mm512_setzero_ps());
3615}
3616
3617static __inline __m512d __DEFAULT_FN_ATTRS512
3619{
3620 return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3621}
3622
3623static __inline__ __m512d __DEFAULT_FN_ATTRS512
3624_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3625{
3626 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3627 (__v8df)_mm512_cvtepi32_pd(__A),
3628 (__v8df)__W);
3629}
3630
3631static __inline__ __m512d __DEFAULT_FN_ATTRS512
3633{
3634 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3635 (__v8df)_mm512_cvtepi32_pd(__A),
3636 (__v8df)_mm512_setzero_pd());
3637}
3638
3639static __inline__ __m512d __DEFAULT_FN_ATTRS512
3641{
3642 return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3643}
3644
3645static __inline__ __m512d __DEFAULT_FN_ATTRS512
3646_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3647{
3648 return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3649}
3650
3651static __inline__ __m512 __DEFAULT_FN_ATTRS512
3653{
3654 return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3655}
3656
3657static __inline__ __m512 __DEFAULT_FN_ATTRS512
3658_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3659{
3660 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3661 (__v16sf)_mm512_cvtepi32_ps(__A),
3662 (__v16sf)__W);
3663}
3664
3665static __inline__ __m512 __DEFAULT_FN_ATTRS512
3667{
3668 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3669 (__v16sf)_mm512_cvtepi32_ps(__A),
3670 (__v16sf)_mm512_setzero_ps());
3671}
3672
3673static __inline __m512d __DEFAULT_FN_ATTRS512
3675{
3676 return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3677}
3678
3679static __inline__ __m512d __DEFAULT_FN_ATTRS512
3680_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3681{
3682 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3683 (__v8df)_mm512_cvtepu32_pd(__A),
3684 (__v8df)__W);
3685}
3686
3687static __inline__ __m512d __DEFAULT_FN_ATTRS512
3689{
3690 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3691 (__v8df)_mm512_cvtepu32_pd(__A),
3692 (__v8df)_mm512_setzero_pd());
3693}
3694
3695static __inline__ __m512d __DEFAULT_FN_ATTRS512
3697{
3698 return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3699}
3700
3701static __inline__ __m512d __DEFAULT_FN_ATTRS512
3702_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3703{
3704 return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3705}
3706
3707#define _mm512_cvt_roundpd_ps(A, R) \
3708 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3709 (__v8sf)_mm256_setzero_ps(), \
3710 (__mmask8)-1, (int)(R))
3711
3712#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3713 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3714 (__v8sf)(__m256)(W), (__mmask8)(U), \
3715 (int)(R))
3716
3717#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3718 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719 (__v8sf)_mm256_setzero_ps(), \
3720 (__mmask8)(U), (int)(R))
3721
3722static __inline__ __m256 __DEFAULT_FN_ATTRS512
3723_mm512_cvtpd_ps (__m512d __A)
3724{
3725 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3726 (__v8sf) _mm256_undefined_ps (),
3727 (__mmask8) -1,
3729}
3730
3731static __inline__ __m256 __DEFAULT_FN_ATTRS512
3732_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3733{
3734 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3735 (__v8sf) __W,
3736 (__mmask8) __U,
3738}
3739
3740static __inline__ __m256 __DEFAULT_FN_ATTRS512
3742{
3743 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3744 (__v8sf) _mm256_setzero_ps (),
3745 (__mmask8) __U,
3747}
3748
3749static __inline__ __m512 __DEFAULT_FN_ATTRS512
3751{
3752 return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3753 (__v8sf) _mm256_setzero_ps (),
3754 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3755}
3756
3757static __inline__ __m512 __DEFAULT_FN_ATTRS512
3758_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3759{
3760 return (__m512) __builtin_shufflevector (
3762 __U, __A),
3763 (__v8sf) _mm256_setzero_ps (),
3764 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3765}
3766
3767#define _mm512_cvt_roundps_ph(A, I) \
3768 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3769 (__v16hi)_mm256_undefined_si256(), \
3770 (__mmask16)-1)
3771
3772#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3773 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3774 (__v16hi)(__m256i)(U), \
3775 (__mmask16)(W))
3776
3777#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3778 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779 (__v16hi)_mm256_setzero_si256(), \
3780 (__mmask16)(W))
3781
3782#define _mm512_cvtps_ph _mm512_cvt_roundps_ph
3783#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph
3784#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3785
3786#define _mm512_cvt_roundph_ps(A, R) \
3787 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3788 (__v16sf)_mm512_undefined_ps(), \
3789 (__mmask16)-1, (int)(R))
3790
3791#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3792 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3793 (__v16sf)(__m512)(W), \
3794 (__mmask16)(U), (int)(R))
3795
3796#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3797 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3798 (__v16sf)_mm512_setzero_ps(), \
3799 (__mmask16)(U), (int)(R))
3800
3801
3802static __inline __m512 __DEFAULT_FN_ATTRS512
3804{
3805 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3806 (__v16sf)
3808 (__mmask16) -1,
3810}
3811
3812static __inline__ __m512 __DEFAULT_FN_ATTRS512
3813_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3814{
3815 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3816 (__v16sf) __W,
3817 (__mmask16) __U,
3819}
3820
3821static __inline__ __m512 __DEFAULT_FN_ATTRS512
3823{
3824 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3825 (__v16sf) _mm512_setzero_ps (),
3826 (__mmask16) __U,
3828}
3829
3830#define _mm512_cvtt_roundpd_epi32(A, R) \
3831 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3832 (__v8si)_mm256_setzero_si256(), \
3833 (__mmask8)-1, (int)(R))
3834
3835#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3836 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3837 (__v8si)(__m256i)(W), \
3838 (__mmask8)(U), (int)(R))
3839
3840#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3841 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3842 (__v8si)_mm256_setzero_si256(), \
3843 (__mmask8)(U), (int)(R))
3844
3845static __inline __m256i __DEFAULT_FN_ATTRS512
3847{
3848 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3849 (__v8si)_mm256_setzero_si256(),
3850 (__mmask8) -1,
3852}
3853
3854static __inline__ __m256i __DEFAULT_FN_ATTRS512
3855_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3856{
3857 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3858 (__v8si) __W,
3859 (__mmask8) __U,
3861}
3862
3863static __inline__ __m256i __DEFAULT_FN_ATTRS512
3865{
3866 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3867 (__v8si) _mm256_setzero_si256 (),
3868 (__mmask8) __U,
3870}
3871
3872#define _mm512_cvtt_roundps_epi32(A, R) \
3873 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3874 (__v16si)_mm512_setzero_si512(), \
3875 (__mmask16)-1, (int)(R))
3876
3877#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3878 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3879 (__v16si)(__m512i)(W), \
3880 (__mmask16)(U), (int)(R))
3881
3882#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3883 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3884 (__v16si)_mm512_setzero_si512(), \
3885 (__mmask16)(U), (int)(R))
3886
3887static __inline __m512i __DEFAULT_FN_ATTRS512
3889{
3890 return (__m512i)
3891 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3892 (__v16si) _mm512_setzero_si512 (),
3894}
3895
3896static __inline__ __m512i __DEFAULT_FN_ATTRS512
3897_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3898{
3899 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3900 (__v16si) __W,
3901 (__mmask16) __U,
3903}
3904
3905static __inline__ __m512i __DEFAULT_FN_ATTRS512
3907{
3908 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3909 (__v16si) _mm512_setzero_si512 (),
3910 (__mmask16) __U,
3912}
3913
3914#define _mm512_cvt_roundps_epi32(A, R) \
3915 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3916 (__v16si)_mm512_setzero_si512(), \
3917 (__mmask16)-1, (int)(R))
3918
3919#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3920 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3921 (__v16si)(__m512i)(W), \
3922 (__mmask16)(U), (int)(R))
3923
3924#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3925 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3926 (__v16si)_mm512_setzero_si512(), \
3927 (__mmask16)(U), (int)(R))
3928
3929static __inline__ __m512i __DEFAULT_FN_ATTRS512
3931{
3932 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3933 (__v16si) _mm512_undefined_epi32 (),
3934 (__mmask16) -1,
3936}
3937
3938static __inline__ __m512i __DEFAULT_FN_ATTRS512
3939_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3940{
3941 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3942 (__v16si) __W,
3943 (__mmask16) __U,
3945}
3946
3947static __inline__ __m512i __DEFAULT_FN_ATTRS512
3949{
3950 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3951 (__v16si)
3953 (__mmask16) __U,
3955}
3956
3957#define _mm512_cvt_roundpd_epi32(A, R) \
3958 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3959 (__v8si)_mm256_setzero_si256(), \
3960 (__mmask8)-1, (int)(R))
3961
3962#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3963 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3964 (__v8si)(__m256i)(W), \
3965 (__mmask8)(U), (int)(R))
3966
3967#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3968 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3969 (__v8si)_mm256_setzero_si256(), \
3970 (__mmask8)(U), (int)(R))
3971
3972static __inline__ __m256i __DEFAULT_FN_ATTRS512
3974{
3975 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3976 (__v8si)
3978 (__mmask8) -1,
3980}
3981
3982static __inline__ __m256i __DEFAULT_FN_ATTRS512
3983_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3984{
3985 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3986 (__v8si) __W,
3987 (__mmask8) __U,
3989}
3990
3991static __inline__ __m256i __DEFAULT_FN_ATTRS512
3993{
3994 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3995 (__v8si)
3997 (__mmask8) __U,
3999}
4000
4001#define _mm512_cvt_roundps_epu32(A, R) \
4002 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4003 (__v16si)_mm512_setzero_si512(), \
4004 (__mmask16)-1, (int)(R))
4005
4006#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4007 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4008 (__v16si)(__m512i)(W), \
4009 (__mmask16)(U), (int)(R))
4010
4011#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4012 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4013 (__v16si)_mm512_setzero_si512(), \
4014 (__mmask16)(U), (int)(R))
4015
4016static __inline__ __m512i __DEFAULT_FN_ATTRS512
4018{
4019 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4020 (__v16si)\
4022 (__mmask16) -1,\
4024}
4025
4026static __inline__ __m512i __DEFAULT_FN_ATTRS512
4027_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4028{
4029 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4030 (__v16si) __W,
4031 (__mmask16) __U,
4033}
4034
4035static __inline__ __m512i __DEFAULT_FN_ATTRS512
4037{
4038 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4039 (__v16si)
4041 (__mmask16) __U ,
4043}
4044
4045#define _mm512_cvt_roundpd_epu32(A, R) \
4046 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4047 (__v8si)_mm256_setzero_si256(), \
4048 (__mmask8)-1, (int)(R))
4049
4050#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4051 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4052 (__v8si)(__m256i)(W), \
4053 (__mmask8)(U), (int)(R))
4054
4055#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4056 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4057 (__v8si)_mm256_setzero_si256(), \
4058 (__mmask8)(U), (int)(R))
4059
4060static __inline__ __m256i __DEFAULT_FN_ATTRS512
4062{
4063 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4064 (__v8si)
4066 (__mmask8) -1,
4068}
4069
4070static __inline__ __m256i __DEFAULT_FN_ATTRS512
4071_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4072{
4073 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4074 (__v8si) __W,
4075 (__mmask8) __U,
4077}
4078
4079static __inline__ __m256i __DEFAULT_FN_ATTRS512
4081{
4082 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4083 (__v8si)
4085 (__mmask8) __U,
4087}
4088
4089static __inline__ double __DEFAULT_FN_ATTRS512
4091{
4092 return __a[0];
4093}
4094
4095static __inline__ float __DEFAULT_FN_ATTRS512
4097{
4098 return __a[0];
4099}
4100
4101/* Unpack and Interleave */
4102
4103static __inline __m512d __DEFAULT_FN_ATTRS512
4104_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4105{
4106 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4107 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4108}
4109
4110static __inline__ __m512d __DEFAULT_FN_ATTRS512
4111_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4112{
4113 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4114 (__v8df)_mm512_unpackhi_pd(__A, __B),
4115 (__v8df)__W);
4116}
4117
4118static __inline__ __m512d __DEFAULT_FN_ATTRS512
4119_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4120{
4121 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4122 (__v8df)_mm512_unpackhi_pd(__A, __B),
4123 (__v8df)_mm512_setzero_pd());
4124}
4125
4126static __inline __m512d __DEFAULT_FN_ATTRS512
4127_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4128{
4129 return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4130 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4131}
4132
4133static __inline__ __m512d __DEFAULT_FN_ATTRS512
4134_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4135{
4136 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4137 (__v8df)_mm512_unpacklo_pd(__A, __B),
4138 (__v8df)__W);
4139}
4140
4141static __inline__ __m512d __DEFAULT_FN_ATTRS512
4142_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4143{
4144 return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4145 (__v8df)_mm512_unpacklo_pd(__A, __B),
4146 (__v8df)_mm512_setzero_pd());
4147}
4148
4149static __inline __m512 __DEFAULT_FN_ATTRS512
4151{
4152 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4153 2, 18, 3, 19,
4154 2+4, 18+4, 3+4, 19+4,
4155 2+8, 18+8, 3+8, 19+8,
4156 2+12, 18+12, 3+12, 19+12);
4157}
4158
4159static __inline__ __m512 __DEFAULT_FN_ATTRS512
4160_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4161{
4162 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4163 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4164 (__v16sf)__W);
4165}
4166
4167static __inline__ __m512 __DEFAULT_FN_ATTRS512
4168_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4169{
4170 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4171 (__v16sf)_mm512_unpackhi_ps(__A, __B),
4172 (__v16sf)_mm512_setzero_ps());
4173}
4174
4175static __inline __m512 __DEFAULT_FN_ATTRS512
4177{
4178 return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4179 0, 16, 1, 17,
4180 0+4, 16+4, 1+4, 17+4,
4181 0+8, 16+8, 1+8, 17+8,
4182 0+12, 16+12, 1+12, 17+12);
4183}
4184
4185static __inline__ __m512 __DEFAULT_FN_ATTRS512
4186_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4187{
4188 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4189 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4190 (__v16sf)__W);
4191}
4192
4193static __inline__ __m512 __DEFAULT_FN_ATTRS512
4194_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4195{
4196 return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4197 (__v16sf)_mm512_unpacklo_ps(__A, __B),
4198 (__v16sf)_mm512_setzero_ps());
4199}
4200
4201static __inline__ __m512i __DEFAULT_FN_ATTRS512
4202_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4203{
4204 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4205 2, 18, 3, 19,
4206 2+4, 18+4, 3+4, 19+4,
4207 2+8, 18+8, 3+8, 19+8,
4208 2+12, 18+12, 3+12, 19+12);
4209}
4210
4211static __inline__ __m512i __DEFAULT_FN_ATTRS512
4212_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4213{
4214 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4215 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4216 (__v16si)__W);
4217}
4218
4219static __inline__ __m512i __DEFAULT_FN_ATTRS512
4220_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4221{
4222 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4223 (__v16si)_mm512_unpackhi_epi32(__A, __B),
4224 (__v16si)_mm512_setzero_si512());
4225}
4226
4227static __inline__ __m512i __DEFAULT_FN_ATTRS512
4228_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4229{
4230 return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4231 0, 16, 1, 17,
4232 0+4, 16+4, 1+4, 17+4,
4233 0+8, 16+8, 1+8, 17+8,
4234 0+12, 16+12, 1+12, 17+12);
4235}
4236
4237static __inline__ __m512i __DEFAULT_FN_ATTRS512
4238_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4239{
4240 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4241 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4242 (__v16si)__W);
4243}
4244
4245static __inline__ __m512i __DEFAULT_FN_ATTRS512
4246_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4247{
4248 return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4249 (__v16si)_mm512_unpacklo_epi32(__A, __B),
4250 (__v16si)_mm512_setzero_si512());
4251}
4252
4253static __inline__ __m512i __DEFAULT_FN_ATTRS512
4254_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4255{
4256 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4257 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4258}
4259
4260static __inline__ __m512i __DEFAULT_FN_ATTRS512
4261_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4262{
4263 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4264 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4265 (__v8di)__W);
4266}
4267
4268static __inline__ __m512i __DEFAULT_FN_ATTRS512
4269_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4270{
4271 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4272 (__v8di)_mm512_unpackhi_epi64(__A, __B),
4273 (__v8di)_mm512_setzero_si512());
4274}
4275
4276static __inline__ __m512i __DEFAULT_FN_ATTRS512
4277_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4278{
4279 return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4281}
4282
4283static __inline__ __m512i __DEFAULT_FN_ATTRS512
4284_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4285{
4286 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4287 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4288 (__v8di)__W);
4289}
4290
4291static __inline__ __m512i __DEFAULT_FN_ATTRS512
4292_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4293{
4294 return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4295 (__v8di)_mm512_unpacklo_epi64(__A, __B),
4296 (__v8di)_mm512_setzero_si512());
4297}
4298
4299
4300/* SIMD load ops */
4301
4302static __inline __m512i __DEFAULT_FN_ATTRS512
4303_mm512_loadu_si512 (void const *__P)
4304{
4305 struct __loadu_si512 {
4306 __m512i_u __v;
4307 } __attribute__((__packed__, __may_alias__));
4308 return ((const struct __loadu_si512*)__P)->__v;
4309}
4310
4311static __inline __m512i __DEFAULT_FN_ATTRS512
4312_mm512_loadu_epi32 (void const *__P)
4313{
4314 struct __loadu_epi32 {
4315 __m512i_u __v;
4316 } __attribute__((__packed__, __may_alias__));
4317 return ((const struct __loadu_epi32*)__P)->__v;
4318}
4319
4320static __inline __m512i __DEFAULT_FN_ATTRS512
4321_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4322{
4323 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4324 (__v16si) __W,
4325 (__mmask16) __U);
4326}
4327
4328
4329static __inline __m512i __DEFAULT_FN_ATTRS512
4331{
4332 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4333 (__v16si)
4335 (__mmask16) __U);
4336}
4337
4338static __inline __m512i __DEFAULT_FN_ATTRS512
4339_mm512_loadu_epi64 (void const *__P)
4340{
4341 struct __loadu_epi64 {
4342 __m512i_u __v;
4343 } __attribute__((__packed__, __may_alias__));
4344 return ((const struct __loadu_epi64*)__P)->__v;
4345}
4346
4347static __inline __m512i __DEFAULT_FN_ATTRS512
4348_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4349{
4350 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4351 (__v8di) __W,
4352 (__mmask8) __U);
4353}
4354
4355static __inline __m512i __DEFAULT_FN_ATTRS512
4357{
4358 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4359 (__v8di)
4361 (__mmask8) __U);
4362}
4363
4364static __inline __m512 __DEFAULT_FN_ATTRS512
4365_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4366{
4367 return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4368 (__v16sf) __W,
4369 (__mmask16) __U);
4370}
4371
4372static __inline __m512 __DEFAULT_FN_ATTRS512
4374{
4375 return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4376 (__v16sf)
4378 (__mmask16) __U);
4379}
4380
4381static __inline __m512d __DEFAULT_FN_ATTRS512
4382_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4383{
4384 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4385 (__v8df) __W,
4386 (__mmask8) __U);
4387}
4388
4389static __inline __m512d __DEFAULT_FN_ATTRS512
4390_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4391{
4392 return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4393 (__v8df)
4395 (__mmask8) __U);
4396}
4397
4398static __inline __m512d __DEFAULT_FN_ATTRS512
4400{
4401 struct __loadu_pd {
4402 __m512d_u __v;
4403 } __attribute__((__packed__, __may_alias__));
4404 return ((const struct __loadu_pd*)__p)->__v;
4405}
4406
4407static __inline __m512 __DEFAULT_FN_ATTRS512
4409{
4410 struct __loadu_ps {
4411 __m512_u __v;
4412 } __attribute__((__packed__, __may_alias__));
4413 return ((const struct __loadu_ps*)__p)->__v;
4414}
4415
4416static __inline __m512 __DEFAULT_FN_ATTRS512
4418{
4419 return *(const __m512*)__p;
4420}
4421
4422static __inline __m512 __DEFAULT_FN_ATTRS512
4423_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4424{
4425 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4426 (__v16sf) __W,
4427 (__mmask16) __U);
4428}
4429
4430static __inline __m512 __DEFAULT_FN_ATTRS512
4431_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4432{
4433 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4434 (__v16sf)
4436 (__mmask16) __U);
4437}
4438
4439static __inline __m512d __DEFAULT_FN_ATTRS512
4441{
4442 return *(const __m512d*)__p;
4443}
4444
4445static __inline __m512d __DEFAULT_FN_ATTRS512
4446_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4447{
4448 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4449 (__v8df) __W,
4450 (__mmask8) __U);
4451}
4452
4453static __inline __m512d __DEFAULT_FN_ATTRS512
4454_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4455{
4456 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4457 (__v8df)
4459 (__mmask8) __U);
4460}
4461
4462static __inline __m512i __DEFAULT_FN_ATTRS512
4463_mm512_load_si512 (void const *__P)
4464{
4465 return *(const __m512i *) __P;
4466}
4467
4468static __inline __m512i __DEFAULT_FN_ATTRS512
4469_mm512_load_epi32 (void const *__P)
4470{
4471 return *(const __m512i *) __P;
4472}
4473
4474static __inline __m512i __DEFAULT_FN_ATTRS512
4475_mm512_load_epi64 (void const *__P)
4476{
4477 return *(const __m512i *) __P;
4478}
4479
4480/* SIMD store ops */
4481
4482static __inline void __DEFAULT_FN_ATTRS512
4483_mm512_storeu_epi64 (void *__P, __m512i __A)
4484{
4485 struct __storeu_epi64 {
4486 __m512i_u __v;
4487 } __attribute__((__packed__, __may_alias__));
4488 ((struct __storeu_epi64*)__P)->__v = __A;
4489}
4490
4491static __inline void __DEFAULT_FN_ATTRS512
4492_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4493{
4494 __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4495 (__mmask8) __U);
4496}
4497
4498static __inline void __DEFAULT_FN_ATTRS512
4499_mm512_storeu_si512 (void *__P, __m512i __A)
4500{
4501 struct __storeu_si512 {
4502 __m512i_u __v;
4503 } __attribute__((__packed__, __may_alias__));
4504 ((struct __storeu_si512*)__P)->__v = __A;
4505}
4506
4507static __inline void __DEFAULT_FN_ATTRS512
4508_mm512_storeu_epi32 (void *__P, __m512i __A)
4509{
4510 struct __storeu_epi32 {
4511 __m512i_u __v;
4512 } __attribute__((__packed__, __may_alias__));
4513 ((struct __storeu_epi32*)__P)->__v = __A;
4514}
4515
4516static __inline void __DEFAULT_FN_ATTRS512
4517_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4518{
4519 __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4520 (__mmask16) __U);
4521}
4522
4523static __inline void __DEFAULT_FN_ATTRS512
4524_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4525{
4526 __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4527}
4528
4529static __inline void __DEFAULT_FN_ATTRS512
4530_mm512_storeu_pd(void *__P, __m512d __A)
4531{
4532 struct __storeu_pd {
4533 __m512d_u __v;
4534 } __attribute__((__packed__, __may_alias__));
4535 ((struct __storeu_pd*)__P)->__v = __A;
4536}
4537
4538static __inline void __DEFAULT_FN_ATTRS512
4539_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4540{
4541 __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4542 (__mmask16) __U);
4543}
4544
4545static __inline void __DEFAULT_FN_ATTRS512
4546_mm512_storeu_ps(void *__P, __m512 __A)
4547{
4548 struct __storeu_ps {
4549 __m512_u __v;
4550 } __attribute__((__packed__, __may_alias__));
4551 ((struct __storeu_ps*)__P)->__v = __A;
4552}
4553
4554static __inline void __DEFAULT_FN_ATTRS512
4555_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4556{
4557 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4558}
4559
4560static __inline void __DEFAULT_FN_ATTRS512
4561_mm512_store_pd(void *__P, __m512d __A)
4562{
4563 *(__m512d*)__P = __A;
4564}
4565
4566static __inline void __DEFAULT_FN_ATTRS512
4567_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4568{
4569 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4570 (__mmask16) __U);
4571}
4572
4573static __inline void __DEFAULT_FN_ATTRS512
4574_mm512_store_ps(void *__P, __m512 __A)
4575{
4576 *(__m512*)__P = __A;
4577}
4578
4579static __inline void __DEFAULT_FN_ATTRS512
4580_mm512_store_si512 (void *__P, __m512i __A)
4581{
4582 *(__m512i *) __P = __A;
4583}
4584
4585static __inline void __DEFAULT_FN_ATTRS512
4586_mm512_store_epi32 (void *__P, __m512i __A)
4587{
4588 *(__m512i *) __P = __A;
4589}
4590
4591static __inline void __DEFAULT_FN_ATTRS512
4592_mm512_store_epi64 (void *__P, __m512i __A)
4593{
4594 *(__m512i *) __P = __A;
4595}
4596
4597/* Mask ops */
4598
4599static __inline __mmask16 __DEFAULT_FN_ATTRS
4601{
4602 return __builtin_ia32_knothi(__M);
4603}
4604
4605/* Integer compare */
4606
4607#define _mm512_cmpeq_epi32_mask(A, B) \
4608 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4609#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4610 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4611#define _mm512_cmpge_epi32_mask(A, B) \
4612 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4613#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4614 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4615#define _mm512_cmpgt_epi32_mask(A, B) \
4616 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4617#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4618 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4619#define _mm512_cmple_epi32_mask(A, B) \
4620 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4621#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4622 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4623#define _mm512_cmplt_epi32_mask(A, B) \
4624 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4625#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4626 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4627#define _mm512_cmpneq_epi32_mask(A, B) \
4628 _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4629#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4630 _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4631
4632#define _mm512_cmpeq_epu32_mask(A, B) \
4633 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4634#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4635 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4636#define _mm512_cmpge_epu32_mask(A, B) \
4637 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4638#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4639 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4640#define _mm512_cmpgt_epu32_mask(A, B) \
4641 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4642#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4643 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4644#define _mm512_cmple_epu32_mask(A, B) \
4645 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4646#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4647 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4648#define _mm512_cmplt_epu32_mask(A, B) \
4649 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4650#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4651 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4652#define _mm512_cmpneq_epu32_mask(A, B) \
4653 _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4654#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4655 _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4656
4657#define _mm512_cmpeq_epi64_mask(A, B) \
4658 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4659#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4660 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4661#define _mm512_cmpge_epi64_mask(A, B) \
4662 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4663#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4664 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4665#define _mm512_cmpgt_epi64_mask(A, B) \
4666 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4667#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4668 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4669#define _mm512_cmple_epi64_mask(A, B) \
4670 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4671#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4672 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4673#define _mm512_cmplt_epi64_mask(A, B) \
4674 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4675#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4676 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4677#define _mm512_cmpneq_epi64_mask(A, B) \
4678 _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4679#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4680 _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4681
4682#define _mm512_cmpeq_epu64_mask(A, B) \
4683 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4684#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4685 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4686#define _mm512_cmpge_epu64_mask(A, B) \
4687 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4688#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4689 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4690#define _mm512_cmpgt_epu64_mask(A, B) \
4691 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4692#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4693 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4694#define _mm512_cmple_epu64_mask(A, B) \
4695 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4696#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4697 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4698#define _mm512_cmplt_epu64_mask(A, B) \
4699 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4700#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4701 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4702#define _mm512_cmpneq_epu64_mask(A, B) \
4703 _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4704#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4705 _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4706
4707static __inline__ __m512i __DEFAULT_FN_ATTRS512
4709{
4710 /* This function always performs a signed extension, but __v16qi is a char
4711 which may be signed or unsigned, so use __v16qs. */
4712 return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4713}
4714
4715static __inline__ __m512i __DEFAULT_FN_ATTRS512
4716_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4717{
4718 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4719 (__v16si)_mm512_cvtepi8_epi32(__A),
4720 (__v16si)__W);
4721}
4722
4723static __inline__ __m512i __DEFAULT_FN_ATTRS512
4725{
4726 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4727 (__v16si)_mm512_cvtepi8_epi32(__A),
4728 (__v16si)_mm512_setzero_si512());
4729}
4730
4731static __inline__ __m512i __DEFAULT_FN_ATTRS512
4733{
4734 /* This function always performs a signed extension, but __v16qi is a char
4735 which may be signed or unsigned, so use __v16qs. */
4736 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4737}
4738
4739static __inline__ __m512i __DEFAULT_FN_ATTRS512
4740_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4741{
4742 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4743 (__v8di)_mm512_cvtepi8_epi64(__A),
4744 (__v8di)__W);
4745}
4746
4747static __inline__ __m512i __DEFAULT_FN_ATTRS512
4749{
4750 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4751 (__v8di)_mm512_cvtepi8_epi64(__A),
4752 (__v8di)_mm512_setzero_si512 ());
4753}
4754
4755static __inline__ __m512i __DEFAULT_FN_ATTRS512
4757{
4758 return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
4759}
4760
4761static __inline__ __m512i __DEFAULT_FN_ATTRS512
4762_mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4763{
4764 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4765 (__v8di)_mm512_cvtepi32_epi64(__X),
4766 (__v8di)__W);
4767}
4768
4769static __inline__ __m512i __DEFAULT_FN_ATTRS512
4771{
4772 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4773 (__v8di)_mm512_cvtepi32_epi64(__X),
4774 (__v8di)_mm512_setzero_si512());
4775}
4776
4777static __inline__ __m512i __DEFAULT_FN_ATTRS512
4779{
4780 return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
4781}
4782
4783static __inline__ __m512i __DEFAULT_FN_ATTRS512
4784_mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4785{
4786 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4787 (__v16si)_mm512_cvtepi16_epi32(__A),
4788 (__v16si)__W);
4789}
4790
4791static __inline__ __m512i __DEFAULT_FN_ATTRS512
4793{
4794 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4795 (__v16si)_mm512_cvtepi16_epi32(__A),
4796 (__v16si)_mm512_setzero_si512 ());
4797}
4798
4799static __inline__ __m512i __DEFAULT_FN_ATTRS512
4801{
4802 return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
4803}
4804
4805static __inline__ __m512i __DEFAULT_FN_ATTRS512
4806_mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4807{
4808 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4809 (__v8di)_mm512_cvtepi16_epi64(__A),
4810 (__v8di)__W);
4811}
4812
4813static __inline__ __m512i __DEFAULT_FN_ATTRS512
4815{
4816 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4817 (__v8di)_mm512_cvtepi16_epi64(__A),
4818 (__v8di)_mm512_setzero_si512());
4819}
4820
4821static __inline__ __m512i __DEFAULT_FN_ATTRS512
4823{
4824 return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
4825}
4826
4827static __inline__ __m512i __DEFAULT_FN_ATTRS512
4828_mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4829{
4830 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4831 (__v16si)_mm512_cvtepu8_epi32(__A),
4832 (__v16si)__W);
4833}
4834
4835static __inline__ __m512i __DEFAULT_FN_ATTRS512
4837{
4838 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4839 (__v16si)_mm512_cvtepu8_epi32(__A),
4840 (__v16si)_mm512_setzero_si512());
4841}
4842
4843static __inline__ __m512i __DEFAULT_FN_ATTRS512
4845{
4846 return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4847}
4848
4849static __inline__ __m512i __DEFAULT_FN_ATTRS512
4850_mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4851{
4852 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4853 (__v8di)_mm512_cvtepu8_epi64(__A),
4854 (__v8di)__W);
4855}
4856
4857static __inline__ __m512i __DEFAULT_FN_ATTRS512
4859{
4860 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4861 (__v8di)_mm512_cvtepu8_epi64(__A),
4862 (__v8di)_mm512_setzero_si512());
4863}
4864
4865static __inline__ __m512i __DEFAULT_FN_ATTRS512
4867{
4868 return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
4869}
4870
4871static __inline__ __m512i __DEFAULT_FN_ATTRS512
4872_mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
4873{
4874 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4875 (__v8di)_mm512_cvtepu32_epi64(__X),
4876 (__v8di)__W);
4877}
4878
4879static __inline__ __m512i __DEFAULT_FN_ATTRS512
4881{
4882 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4883 (__v8di)_mm512_cvtepu32_epi64(__X),
4884 (__v8di)_mm512_setzero_si512());
4885}
4886
4887static __inline__ __m512i __DEFAULT_FN_ATTRS512
4889{
4890 return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
4891}
4892
4893static __inline__ __m512i __DEFAULT_FN_ATTRS512
4894_mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
4895{
4896 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4897 (__v16si)_mm512_cvtepu16_epi32(__A),
4898 (__v16si)__W);
4899}
4900
4901static __inline__ __m512i __DEFAULT_FN_ATTRS512
4903{
4904 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4905 (__v16si)_mm512_cvtepu16_epi32(__A),
4906 (__v16si)_mm512_setzero_si512());
4907}
4908
4909static __inline__ __m512i __DEFAULT_FN_ATTRS512
4911{
4912 return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
4913}
4914
4915static __inline__ __m512i __DEFAULT_FN_ATTRS512
4916_mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4917{
4918 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4919 (__v8di)_mm512_cvtepu16_epi64(__A),
4920 (__v8di)__W);
4921}
4922
4923static __inline__ __m512i __DEFAULT_FN_ATTRS512
4925{
4926 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4927 (__v8di)_mm512_cvtepu16_epi64(__A),
4928 (__v8di)_mm512_setzero_si512());
4929}
4930
4931static __inline__ __m512i __DEFAULT_FN_ATTRS512
4932_mm512_rorv_epi32 (__m512i __A, __m512i __B)
4933{
4934 return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
4935}
4936
4937static __inline__ __m512i __DEFAULT_FN_ATTRS512
4938_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4939{
4940 return (__m512i)__builtin_ia32_selectd_512(__U,
4941 (__v16si)_mm512_rorv_epi32(__A, __B),
4942 (__v16si)__W);
4943}
4944
4945static __inline__ __m512i __DEFAULT_FN_ATTRS512
4946_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
4947{
4948 return (__m512i)__builtin_ia32_selectd_512(__U,
4949 (__v16si)_mm512_rorv_epi32(__A, __B),
4950 (__v16si)_mm512_setzero_si512());
4951}
4952
4953static __inline__ __m512i __DEFAULT_FN_ATTRS512
4954_mm512_rorv_epi64 (__m512i __A, __m512i __B)
4955{
4956 return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
4957}
4958
4959static __inline__ __m512i __DEFAULT_FN_ATTRS512
4960_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4961{
4962 return (__m512i)__builtin_ia32_selectq_512(__U,
4963 (__v8di)_mm512_rorv_epi64(__A, __B),
4964 (__v8di)__W);
4965}
4966
4967static __inline__ __m512i __DEFAULT_FN_ATTRS512
4968_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4969{
4970 return (__m512i)__builtin_ia32_selectq_512(__U,
4971 (__v8di)_mm512_rorv_epi64(__A, __B),
4972 (__v8di)_mm512_setzero_si512());
4973}
4974
4975
4976
4977#define _mm512_cmp_epi32_mask(a, b, p) \
4978 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4979 (__v16si)(__m512i)(b), (int)(p), \
4980 (__mmask16)-1)
4981
4982#define _mm512_cmp_epu32_mask(a, b, p) \
4983 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
4984 (__v16si)(__m512i)(b), (int)(p), \
4985 (__mmask16)-1)
4986
4987#define _mm512_cmp_epi64_mask(a, b, p) \
4988 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
4989 (__v8di)(__m512i)(b), (int)(p), \
4990 (__mmask8)-1)
4991
4992#define _mm512_cmp_epu64_mask(a, b, p) \
4993 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
4994 (__v8di)(__m512i)(b), (int)(p), \
4995 (__mmask8)-1)
4996
4997#define _mm512_mask_cmp_epi32_mask(m, a, b, p) \
4998 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \
4999 (__v16si)(__m512i)(b), (int)(p), \
5000 (__mmask16)(m))
5001
5002#define _mm512_mask_cmp_epu32_mask(m, a, b, p) \
5003 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \
5004 (__v16si)(__m512i)(b), (int)(p), \
5005 (__mmask16)(m))
5006
5007#define _mm512_mask_cmp_epi64_mask(m, a, b, p) \
5008 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \
5009 (__v8di)(__m512i)(b), (int)(p), \
5010 (__mmask8)(m))
5011
5012#define _mm512_mask_cmp_epu64_mask(m, a, b, p) \
5013 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \
5014 (__v8di)(__m512i)(b), (int)(p), \
5015 (__mmask8)(m))
5016
5017#define _mm512_rol_epi32(a, b) \
5018 (__m512i)__builtin_ia32_prold512((__v16si)(__m512i)(a), (int)(b))
5019
5020#define _mm512_mask_rol_epi32(W, U, a, b) \
5021 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5022 (__v16si)_mm512_rol_epi32((a), (b)), \
5023 (__v16si)(__m512i)(W))
5024
5025#define _mm512_maskz_rol_epi32(U, a, b) \
5026 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5027 (__v16si)_mm512_rol_epi32((a), (b)), \
5028 (__v16si)_mm512_setzero_si512())
5029
5030#define _mm512_rol_epi64(a, b) \
5031 (__m512i)__builtin_ia32_prolq512((__v8di)(__m512i)(a), (int)(b))
5032
5033#define _mm512_mask_rol_epi64(W, U, a, b) \
5034 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5035 (__v8di)_mm512_rol_epi64((a), (b)), \
5036 (__v8di)(__m512i)(W))
5037
5038#define _mm512_maskz_rol_epi64(U, a, b) \
5039 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5040 (__v8di)_mm512_rol_epi64((a), (b)), \
5041 (__v8di)_mm512_setzero_si512())
5042
5043static __inline__ __m512i __DEFAULT_FN_ATTRS512
5044_mm512_rolv_epi32 (__m512i __A, __m512i __B)
5045{
5046 return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
5047}
5048
5049static __inline__ __m512i __DEFAULT_FN_ATTRS512
5050_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
5051{
5052 return (__m512i)__builtin_ia32_selectd_512(__U,
5053 (__v16si)_mm512_rolv_epi32(__A, __B),
5054 (__v16si)__W);
5055}
5056
5057static __inline__ __m512i __DEFAULT_FN_ATTRS512
5058_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
5059{
5060 return (__m512i)__builtin_ia32_selectd_512(__U,
5061 (__v16si)_mm512_rolv_epi32(__A, __B),
5062 (__v16si)_mm512_setzero_si512());
5063}
5064
5065static __inline__ __m512i __DEFAULT_FN_ATTRS512
5066_mm512_rolv_epi64 (__m512i __A, __m512i __B)
5067{
5068 return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
5069}
5070
5071static __inline__ __m512i __DEFAULT_FN_ATTRS512
5072_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
5073{
5074 return (__m512i)__builtin_ia32_selectq_512(__U,
5075 (__v8di)_mm512_rolv_epi64(__A, __B),
5076 (__v8di)__W);
5077}
5078
5079static __inline__ __m512i __DEFAULT_FN_ATTRS512
5080_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
5081{
5082 return (__m512i)__builtin_ia32_selectq_512(__U,
5083 (__v8di)_mm512_rolv_epi64(__A, __B),
5084 (__v8di)_mm512_setzero_si512());
5085}
5086
5087#define _mm512_ror_epi32(A, B) \
5088 (__m512i)__builtin_ia32_prord512((__v16si)(__m512i)(A), (int)(B))
5089
5090#define _mm512_mask_ror_epi32(W, U, A, B) \
5091 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5092 (__v16si)_mm512_ror_epi32((A), (B)), \
5093 (__v16si)(__m512i)(W))
5094
5095#define _mm512_maskz_ror_epi32(U, A, B) \
5096 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
5097 (__v16si)_mm512_ror_epi32((A), (B)), \
5098 (__v16si)_mm512_setzero_si512())
5099
5100#define _mm512_ror_epi64(A, B) \
5101 (__m512i)__builtin_ia32_prorq512((__v8di)(__m512i)(A), (int)(B))
5102
5103#define _mm512_mask_ror_epi64(W, U, A, B) \
5104 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5105 (__v8di)_mm512_ror_epi64((A), (B)), \
5106 (__v8di)(__m512i)(W))
5107
5108#define _mm512_maskz_ror_epi64(U, A, B) \
5109 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
5110 (__v8di)_mm512_ror_epi64((A), (B)), \
5111 (__v8di)_mm512_setzero_si512())
5112
5113static __inline__ __m512i __DEFAULT_FN_ATTRS512
5114_mm512_slli_epi32(__m512i __A, unsigned int __B)
5115{
5116 return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
5117}
5118
5119static __inline__ __m512i __DEFAULT_FN_ATTRS512
5120_mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5121 unsigned int __B)
5122{
5123 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5124 (__v16si)_mm512_slli_epi32(__A, __B),
5125 (__v16si)__W);
5126}
5127
5128static __inline__ __m512i __DEFAULT_FN_ATTRS512
5129_mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5130 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5131 (__v16si)_mm512_slli_epi32(__A, __B),
5132 (__v16si)_mm512_setzero_si512());
5133}
5134
5135static __inline__ __m512i __DEFAULT_FN_ATTRS512
5136_mm512_slli_epi64(__m512i __A, unsigned int __B)
5137{
5138 return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
5139}
5140
5141static __inline__ __m512i __DEFAULT_FN_ATTRS512
5142_mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
5143{
5144 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5145 (__v8di)_mm512_slli_epi64(__A, __B),
5146 (__v8di)__W);
5147}
5148
5149static __inline__ __m512i __DEFAULT_FN_ATTRS512
5150_mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
5151{
5152 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5153 (__v8di)_mm512_slli_epi64(__A, __B),
5154 (__v8di)_mm512_setzero_si512());
5155}
5156
5157static __inline__ __m512i __DEFAULT_FN_ATTRS512
5158_mm512_srli_epi32(__m512i __A, unsigned int __B)
5159{
5160 return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
5161}
5162
5163static __inline__ __m512i __DEFAULT_FN_ATTRS512
5164_mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A,
5165 unsigned int __B)
5166{
5167 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5168 (__v16si)_mm512_srli_epi32(__A, __B),
5169 (__v16si)__W);
5170}
5171
5172static __inline__ __m512i __DEFAULT_FN_ATTRS512
5173_mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B) {
5174 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5175 (__v16si)_mm512_srli_epi32(__A, __B),
5176 (__v16si)_mm512_setzero_si512());
5177}
5178
5179static __inline__ __m512i __DEFAULT_FN_ATTRS512
5180_mm512_srli_epi64(__m512i __A, unsigned int __B)
5181{
5182 return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
5183}
5184
5185static __inline__ __m512i __DEFAULT_FN_ATTRS512
5186_mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A,
5187 unsigned int __B)
5188{
5189 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5190 (__v8di)_mm512_srli_epi64(__A, __B),
5191 (__v8di)__W);
5192}
5193
5194static __inline__ __m512i __DEFAULT_FN_ATTRS512
5196 unsigned int __B)
5197{
5198 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5199 (__v8di)_mm512_srli_epi64(__A, __B),
5200 (__v8di)_mm512_setzero_si512());
5201}
5202
5203static __inline__ __m512i __DEFAULT_FN_ATTRS512
5204_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
5205{
5206 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5207 (__v16si) __W,
5208 (__mmask16) __U);
5209}
5210
5211static __inline__ __m512i __DEFAULT_FN_ATTRS512
5213{
5214 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
5215 (__v16si)
5217 (__mmask16) __U);
5218}
5219
5220static __inline__ void __DEFAULT_FN_ATTRS512
5221_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
5222{
5223 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
5224 (__mmask16) __U);
5225}
5226
5227static __inline__ __m512i __DEFAULT_FN_ATTRS512
5228_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
5229{
5230 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5231 (__v16si) __A,
5232 (__v16si) __W);
5233}
5234
5235static __inline__ __m512i __DEFAULT_FN_ATTRS512
5237{
5238 return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
5239 (__v16si) __A,
5240 (__v16si) _mm512_setzero_si512 ());
5241}
5242
5243static __inline__ __m512i __DEFAULT_FN_ATTRS512
5244_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
5245{
5246 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5247 (__v8di) __A,
5248 (__v8di) __W);
5249}
5250
5251static __inline__ __m512i __DEFAULT_FN_ATTRS512
5253{
5254 return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
5255 (__v8di) __A,
5256 (__v8di) _mm512_setzero_si512 ());
5257}
5258
5259static __inline__ __m512i __DEFAULT_FN_ATTRS512
5260_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
5261{
5262 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5263 (__v8di) __W,
5264 (__mmask8) __U);
5265}
5266
5267static __inline__ __m512i __DEFAULT_FN_ATTRS512
5268_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
5269{
5270 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
5271 (__v8di)
5273 (__mmask8) __U);
5274}
5275
5276static __inline__ void __DEFAULT_FN_ATTRS512
5277_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
5278{
5279 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
5280 (__mmask8) __U);
5281}
5282
5283static __inline__ __m512d __DEFAULT_FN_ATTRS512
5285{
5286 return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
5287 0, 0, 2, 2, 4, 4, 6, 6);
5288}
5289
5290static __inline__ __m512d __DEFAULT_FN_ATTRS512
5291_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
5292{
5293 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5294 (__v8df)_mm512_movedup_pd(__A),
5295 (__v8df)__W);
5296}
5297
5298static __inline__ __m512d __DEFAULT_FN_ATTRS512
5300{
5301 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
5302 (__v8df)_mm512_movedup_pd(__A),
5303 (__v8df)_mm512_setzero_pd());
5304}
5305
5306#define _mm512_fixupimm_round_pd(A, B, C, imm, R) \
5307 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5308 (__v8df)(__m512d)(B), \
5309 (__v8di)(__m512i)(C), (int)(imm), \
5310 (__mmask8)-1, (int)(R))
5311
5312#define _mm512_mask_fixupimm_round_pd(A, U, B, C, imm, R) \
5313 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5314 (__v8df)(__m512d)(B), \
5315 (__v8di)(__m512i)(C), (int)(imm), \
5316 (__mmask8)(U), (int)(R))
5317
5318#define _mm512_fixupimm_pd(A, B, C, imm) \
5319 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5320 (__v8df)(__m512d)(B), \
5321 (__v8di)(__m512i)(C), (int)(imm), \
5322 (__mmask8)-1, \
5323 _MM_FROUND_CUR_DIRECTION)
5324
5325#define _mm512_mask_fixupimm_pd(A, U, B, C, imm) \
5326 (__m512d)__builtin_ia32_fixupimmpd512_mask((__v8df)(__m512d)(A), \
5327 (__v8df)(__m512d)(B), \
5328 (__v8di)(__m512i)(C), (int)(imm), \
5329 (__mmask8)(U), \
5330 _MM_FROUND_CUR_DIRECTION)
5331
5332#define _mm512_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \
5333 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5334 (__v8df)(__m512d)(B), \
5335 (__v8di)(__m512i)(C), \
5336 (int)(imm), (__mmask8)(U), \
5337 (int)(R))
5338
5339#define _mm512_maskz_fixupimm_pd(U, A, B, C, imm) \
5340 (__m512d)__builtin_ia32_fixupimmpd512_maskz((__v8df)(__m512d)(A), \
5341 (__v8df)(__m512d)(B), \
5342 (__v8di)(__m512i)(C), \
5343 (int)(imm), (__mmask8)(U), \
5344 _MM_FROUND_CUR_DIRECTION)
5345
5346#define _mm512_fixupimm_round_ps(A, B, C, imm, R) \
5347 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5348 (__v16sf)(__m512)(B), \
5349 (__v16si)(__m512i)(C), (int)(imm), \
5350 (__mmask16)-1, (int)(R))
5351
5352#define _mm512_mask_fixupimm_round_ps(A, U, B, C, imm, R) \
5353 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5354 (__v16sf)(__m512)(B), \
5355 (__v16si)(__m512i)(C), (int)(imm), \
5356 (__mmask16)(U), (int)(R))
5357
5358#define _mm512_fixupimm_ps(A, B, C, imm) \
5359 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5360 (__v16sf)(__m512)(B), \
5361 (__v16si)(__m512i)(C), (int)(imm), \
5362 (__mmask16)-1, \
5363 _MM_FROUND_CUR_DIRECTION)
5364
5365#define _mm512_mask_fixupimm_ps(A, U, B, C, imm) \
5366 (__m512)__builtin_ia32_fixupimmps512_mask((__v16sf)(__m512)(A), \
5367 (__v16sf)(__m512)(B), \
5368 (__v16si)(__m512i)(C), (int)(imm), \
5369 (__mmask16)(U), \
5370 _MM_FROUND_CUR_DIRECTION)
5371
5372#define _mm512_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \
5373 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5374 (__v16sf)(__m512)(B), \
5375 (__v16si)(__m512i)(C), \
5376 (int)(imm), (__mmask16)(U), \
5377 (int)(R))
5378
5379#define _mm512_maskz_fixupimm_ps(U, A, B, C, imm) \
5380 (__m512)__builtin_ia32_fixupimmps512_maskz((__v16sf)(__m512)(A), \
5381 (__v16sf)(__m512)(B), \
5382 (__v16si)(__m512i)(C), \
5383 (int)(imm), (__mmask16)(U), \
5384 _MM_FROUND_CUR_DIRECTION)
5385
5386#define _mm_fixupimm_round_sd(A, B, C, imm, R) \
5387 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5388 (__v2df)(__m128d)(B), \
5389 (__v2di)(__m128i)(C), (int)(imm), \
5390 (__mmask8)-1, (int)(R))
5391
5392#define _mm_mask_fixupimm_round_sd(A, U, B, C, imm, R) \
5393 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5394 (__v2df)(__m128d)(B), \
5395 (__v2di)(__m128i)(C), (int)(imm), \
5396 (__mmask8)(U), (int)(R))
5397
5398#define _mm_fixupimm_sd(A, B, C, imm) \
5399 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5400 (__v2df)(__m128d)(B), \
5401 (__v2di)(__m128i)(C), (int)(imm), \
5402 (__mmask8)-1, \
5403 _MM_FROUND_CUR_DIRECTION)
5404
5405#define _mm_mask_fixupimm_sd(A, U, B, C, imm) \
5406 (__m128d)__builtin_ia32_fixupimmsd_mask((__v2df)(__m128d)(A), \
5407 (__v2df)(__m128d)(B), \
5408 (__v2di)(__m128i)(C), (int)(imm), \
5409 (__mmask8)(U), \
5410 _MM_FROUND_CUR_DIRECTION)
5411
5412#define _mm_maskz_fixupimm_round_sd(U, A, B, C, imm, R) \
5413 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5414 (__v2df)(__m128d)(B), \
5415 (__v2di)(__m128i)(C), (int)(imm), \
5416 (__mmask8)(U), (int)(R))
5417
5418#define _mm_maskz_fixupimm_sd(U, A, B, C, imm) \
5419 (__m128d)__builtin_ia32_fixupimmsd_maskz((__v2df)(__m128d)(A), \
5420 (__v2df)(__m128d)(B), \
5421 (__v2di)(__m128i)(C), (int)(imm), \
5422 (__mmask8)(U), \
5423 _MM_FROUND_CUR_DIRECTION)
5424
5425#define _mm_fixupimm_round_ss(A, B, C, imm, R) \
5426 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5427 (__v4sf)(__m128)(B), \
5428 (__v4si)(__m128i)(C), (int)(imm), \
5429 (__mmask8)-1, (int)(R))
5430
5431#define _mm_mask_fixupimm_round_ss(A, U, B, C, imm, R) \
5432 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5433 (__v4sf)(__m128)(B), \
5434 (__v4si)(__m128i)(C), (int)(imm), \
5435 (__mmask8)(U), (int)(R))
5436
5437#define _mm_fixupimm_ss(A, B, C, imm) \
5438 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5439 (__v4sf)(__m128)(B), \
5440 (__v4si)(__m128i)(C), (int)(imm), \
5441 (__mmask8)-1, \
5442 _MM_FROUND_CUR_DIRECTION)
5443
5444#define _mm_mask_fixupimm_ss(A, U, B, C, imm) \
5445 (__m128)__builtin_ia32_fixupimmss_mask((__v4sf)(__m128)(A), \
5446 (__v4sf)(__m128)(B), \
5447 (__v4si)(__m128i)(C), (int)(imm), \
5448 (__mmask8)(U), \
5449 _MM_FROUND_CUR_DIRECTION)
5450
5451#define _mm_maskz_fixupimm_round_ss(U, A, B, C, imm, R) \
5452 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5453 (__v4sf)(__m128)(B), \
5454 (__v4si)(__m128i)(C), (int)(imm), \
5455 (__mmask8)(U), (int)(R))
5456
5457#define _mm_maskz_fixupimm_ss(U, A, B, C, imm) \
5458 (__m128)__builtin_ia32_fixupimmss_maskz((__v4sf)(__m128)(A), \
5459 (__v4sf)(__m128)(B), \
5460 (__v4si)(__m128i)(C), (int)(imm), \
5461 (__mmask8)(U), \
5462 _MM_FROUND_CUR_DIRECTION)
5463
5464#define _mm_getexp_round_sd(A, B, R) \
5465 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5466 (__v2df)(__m128d)(B), \
5467 (__v2df)_mm_setzero_pd(), \
5468 (__mmask8)-1, (int)(R))
5469
5470
5471static __inline__ __m128d __DEFAULT_FN_ATTRS128
5472_mm_getexp_sd (__m128d __A, __m128d __B)
5473{
5474 return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
5475 (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5476}
5477
5478static __inline__ __m128d __DEFAULT_FN_ATTRS128
5479_mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5480{
5481 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5482 (__v2df) __B,
5483 (__v2df) __W,
5484 (__mmask8) __U,
5486}
5487
5488#define _mm_mask_getexp_round_sd(W, U, A, B, R) \
5489 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5490 (__v2df)(__m128d)(B), \
5491 (__v2df)(__m128d)(W), \
5492 (__mmask8)(U), (int)(R))
5493
5494static __inline__ __m128d __DEFAULT_FN_ATTRS128
5495_mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
5496{
5497 return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
5498 (__v2df) __B,
5499 (__v2df) _mm_setzero_pd (),
5500 (__mmask8) __U,
5502}
5503
5504#define _mm_maskz_getexp_round_sd(U, A, B, R) \
5505 (__m128d)__builtin_ia32_getexpsd128_round_mask((__v2df)(__m128d)(A), \
5506 (__v2df)(__m128d)(B), \
5507 (__v2df)_mm_setzero_pd(), \
5508 (__mmask8)(U), (int)(R))
5509
5510#define _mm_getexp_round_ss(A, B, R) \
5511 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5512 (__v4sf)(__m128)(B), \
5513 (__v4sf)_mm_setzero_ps(), \
5514 (__mmask8)-1, (int)(R))
5515
5516static __inline__ __m128 __DEFAULT_FN_ATTRS128
5517_mm_getexp_ss (__m128 __A, __m128 __B)
5518{
5519 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5520 (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
5521}
5522
5523static __inline__ __m128 __DEFAULT_FN_ATTRS128
5524_mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5525{
5526 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5527 (__v4sf) __B,
5528 (__v4sf) __W,
5529 (__mmask8) __U,
5531}
5532
5533#define _mm_mask_getexp_round_ss(W, U, A, B, R) \
5534 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5535 (__v4sf)(__m128)(B), \
5536 (__v4sf)(__m128)(W), \
5537 (__mmask8)(U), (int)(R))
5538
5539static __inline__ __m128 __DEFAULT_FN_ATTRS128
5540_mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
5541{
5542 return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
5543 (__v4sf) __B,
5544 (__v4sf) _mm_setzero_ps (),
5545 (__mmask8) __U,
5547}
5548
5549#define _mm_maskz_getexp_round_ss(U, A, B, R) \
5550 (__m128)__builtin_ia32_getexpss128_round_mask((__v4sf)(__m128)(A), \
5551 (__v4sf)(__m128)(B), \
5552 (__v4sf)_mm_setzero_ps(), \
5553 (__mmask8)(U), (int)(R))
5554
5555#define _mm_getmant_round_sd(A, B, C, D, R) \
5556 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5557 (__v2df)(__m128d)(B), \
5558 (int)(((D)<<2) | (C)), \
5559 (__v2df)_mm_setzero_pd(), \
5560 (__mmask8)-1, (int)(R))
5561
5562#define _mm_getmant_sd(A, B, C, D) \
5563 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5564 (__v2df)(__m128d)(B), \
5565 (int)(((D)<<2) | (C)), \
5566 (__v2df)_mm_setzero_pd(), \
5567 (__mmask8)-1, \
5568 _MM_FROUND_CUR_DIRECTION)
5569
5570#define _mm_mask_getmant_sd(W, U, A, B, C, D) \
5571 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5572 (__v2df)(__m128d)(B), \
5573 (int)(((D)<<2) | (C)), \
5574 (__v2df)(__m128d)(W), \
5575 (__mmask8)(U), \
5576 _MM_FROUND_CUR_DIRECTION)
5577
5578#define _mm_mask_getmant_round_sd(W, U, A, B, C, D, R) \
5579 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5580 (__v2df)(__m128d)(B), \
5581 (int)(((D)<<2) | (C)), \
5582 (__v2df)(__m128d)(W), \
5583 (__mmask8)(U), (int)(R))
5584
5585#define _mm_maskz_getmant_sd(U, A, B, C, D) \
5586 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5587 (__v2df)(__m128d)(B), \
5588 (int)(((D)<<2) | (C)), \
5589 (__v2df)_mm_setzero_pd(), \
5590 (__mmask8)(U), \
5591 _MM_FROUND_CUR_DIRECTION)
5592
5593#define _mm_maskz_getmant_round_sd(U, A, B, C, D, R) \
5594 (__m128d)__builtin_ia32_getmantsd_round_mask((__v2df)(__m128d)(A), \
5595 (__v2df)(__m128d)(B), \
5596 (int)(((D)<<2) | (C)), \
5597 (__v2df)_mm_setzero_pd(), \
5598 (__mmask8)(U), (int)(R))
5599
5600#define _mm_getmant_round_ss(A, B, C, D, R) \
5601 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5602 (__v4sf)(__m128)(B), \
5603 (int)(((D)<<2) | (C)), \
5604 (__v4sf)_mm_setzero_ps(), \
5605 (__mmask8)-1, (int)(R))
5606
5607#define _mm_getmant_ss(A, B, C, D) \
5608 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5609 (__v4sf)(__m128)(B), \
5610 (int)(((D)<<2) | (C)), \
5611 (__v4sf)_mm_setzero_ps(), \
5612 (__mmask8)-1, \
5613 _MM_FROUND_CUR_DIRECTION)
5614
5615#define _mm_mask_getmant_ss(W, U, A, B, C, D) \
5616 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5617 (__v4sf)(__m128)(B), \
5618 (int)(((D)<<2) | (C)), \
5619 (__v4sf)(__m128)(W), \
5620 (__mmask8)(U), \
5621 _MM_FROUND_CUR_DIRECTION)
5622
5623#define _mm_mask_getmant_round_ss(W, U, A, B, C, D, R) \
5624 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5625 (__v4sf)(__m128)(B), \
5626 (int)(((D)<<2) | (C)), \
5627 (__v4sf)(__m128)(W), \
5628 (__mmask8)(U), (int)(R))
5629
5630#define _mm_maskz_getmant_ss(U, A, B, C, D) \
5631 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5632 (__v4sf)(__m128)(B), \
5633 (int)(((D)<<2) | (C)), \
5634 (__v4sf)_mm_setzero_ps(), \
5635 (__mmask8)(U), \
5636 _MM_FROUND_CUR_DIRECTION)
5637
5638#define _mm_maskz_getmant_round_ss(U, A, B, C, D, R) \
5639 (__m128)__builtin_ia32_getmantss_round_mask((__v4sf)(__m128)(A), \
5640 (__v4sf)(__m128)(B), \
5641 (int)(((D)<<2) | (C)), \
5642 (__v4sf)_mm_setzero_ps(), \
5643 (__mmask8)(U), (int)(R))
5644
5645static __inline__ __mmask16 __DEFAULT_FN_ATTRS
5647{
5648 return __A;
5649}
5650
5651#define _mm_comi_round_sd(A, B, P, R) \
5652 (int)__builtin_ia32_vcomisd((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), \
5653 (int)(P), (int)(R))
5654
5655#define _mm_comi_round_ss(A, B, P, R) \
5656 (int)__builtin_ia32_vcomiss((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \
5657 (int)(P), (int)(R))
5658
5659#ifdef __x86_64__
5660#define _mm_cvt_roundsd_si64(A, R) \
5661 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5662#endif
5663
5664static __inline__ __m512i __DEFAULT_FN_ATTRS512
5665_mm512_sll_epi32(__m512i __A, __m128i __B)
5666{
5667 return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
5668}
5669
5670static __inline__ __m512i __DEFAULT_FN_ATTRS512
5671_mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5672{
5673 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5674 (__v16si)_mm512_sll_epi32(__A, __B),
5675 (__v16si)__W);
5676}
5677
5678static __inline__ __m512i __DEFAULT_FN_ATTRS512
5679_mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5680{
5681 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5682 (__v16si)_mm512_sll_epi32(__A, __B),
5683 (__v16si)_mm512_setzero_si512());
5684}
5685
5686static __inline__ __m512i __DEFAULT_FN_ATTRS512
5687_mm512_sll_epi64(__m512i __A, __m128i __B)
5688{
5689 return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
5690}
5691
5692static __inline__ __m512i __DEFAULT_FN_ATTRS512
5693_mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5694{
5695 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5696 (__v8di)_mm512_sll_epi64(__A, __B),
5697 (__v8di)__W);
5698}
5699
5700static __inline__ __m512i __DEFAULT_FN_ATTRS512
5701_mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5702{
5703 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5704 (__v8di)_mm512_sll_epi64(__A, __B),
5705 (__v8di)_mm512_setzero_si512());
5706}
5707
5708static __inline__ __m512i __DEFAULT_FN_ATTRS512
5709_mm512_sllv_epi32(__m512i __X, __m512i __Y)
5710{
5711 return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
5712}
5713
5714static __inline__ __m512i __DEFAULT_FN_ATTRS512
5715_mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5716{
5717 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5718 (__v16si)_mm512_sllv_epi32(__X, __Y),
5719 (__v16si)__W);
5720}
5721
5722static __inline__ __m512i __DEFAULT_FN_ATTRS512
5723_mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5724{
5725 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5726 (__v16si)_mm512_sllv_epi32(__X, __Y),
5727 (__v16si)_mm512_setzero_si512());
5728}
5729
5730static __inline__ __m512i __DEFAULT_FN_ATTRS512
5731_mm512_sllv_epi64(__m512i __X, __m512i __Y)
5732{
5733 return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
5734}
5735
5736static __inline__ __m512i __DEFAULT_FN_ATTRS512
5737_mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5738{
5739 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5740 (__v8di)_mm512_sllv_epi64(__X, __Y),
5741 (__v8di)__W);
5742}
5743
5744static __inline__ __m512i __DEFAULT_FN_ATTRS512
5745_mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5746{
5747 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5748 (__v8di)_mm512_sllv_epi64(__X, __Y),
5749 (__v8di)_mm512_setzero_si512());
5750}
5751
5752static __inline__ __m512i __DEFAULT_FN_ATTRS512
5753_mm512_sra_epi32(__m512i __A, __m128i __B)
5754{
5755 return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
5756}
5757
5758static __inline__ __m512i __DEFAULT_FN_ATTRS512
5759_mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5760{
5761 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5762 (__v16si)_mm512_sra_epi32(__A, __B),
5763 (__v16si)__W);
5764}
5765
5766static __inline__ __m512i __DEFAULT_FN_ATTRS512
5767_mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5768{
5769 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5770 (__v16si)_mm512_sra_epi32(__A, __B),
5771 (__v16si)_mm512_setzero_si512());
5772}
5773
5774static __inline__ __m512i __DEFAULT_FN_ATTRS512
5775_mm512_sra_epi64(__m512i __A, __m128i __B)
5776{
5777 return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
5778}
5779
5780static __inline__ __m512i __DEFAULT_FN_ATTRS512
5781_mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5782{
5783 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5784 (__v8di)_mm512_sra_epi64(__A, __B),
5785 (__v8di)__W);
5786}
5787
5788static __inline__ __m512i __DEFAULT_FN_ATTRS512
5789_mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5790{
5791 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5792 (__v8di)_mm512_sra_epi64(__A, __B),
5793 (__v8di)_mm512_setzero_si512());
5794}
5795
5796static __inline__ __m512i __DEFAULT_FN_ATTRS512
5797_mm512_srav_epi32(__m512i __X, __m512i __Y)
5798{
5799 return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
5800}
5801
5802static __inline__ __m512i __DEFAULT_FN_ATTRS512
5803_mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5804{
5805 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5806 (__v16si)_mm512_srav_epi32(__X, __Y),
5807 (__v16si)__W);
5808}
5809
5810static __inline__ __m512i __DEFAULT_FN_ATTRS512
5811_mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5812{
5813 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5814 (__v16si)_mm512_srav_epi32(__X, __Y),
5815 (__v16si)_mm512_setzero_si512());
5816}
5817
5818static __inline__ __m512i __DEFAULT_FN_ATTRS512
5819_mm512_srav_epi64(__m512i __X, __m512i __Y)
5820{
5821 return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
5822}
5823
5824static __inline__ __m512i __DEFAULT_FN_ATTRS512
5825_mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5826{
5827 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5828 (__v8di)_mm512_srav_epi64(__X, __Y),
5829 (__v8di)__W);
5830}
5831
5832static __inline__ __m512i __DEFAULT_FN_ATTRS512
5833_mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5834{
5835 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5836 (__v8di)_mm512_srav_epi64(__X, __Y),
5837 (__v8di)_mm512_setzero_si512());
5838}
5839
5840static __inline__ __m512i __DEFAULT_FN_ATTRS512
5841_mm512_srl_epi32(__m512i __A, __m128i __B)
5842{
5843 return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
5844}
5845
5846static __inline__ __m512i __DEFAULT_FN_ATTRS512
5847_mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
5848{
5849 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5850 (__v16si)_mm512_srl_epi32(__A, __B),
5851 (__v16si)__W);
5852}
5853
5854static __inline__ __m512i __DEFAULT_FN_ATTRS512
5855_mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
5856{
5857 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5858 (__v16si)_mm512_srl_epi32(__A, __B),
5859 (__v16si)_mm512_setzero_si512());
5860}
5861
5862static __inline__ __m512i __DEFAULT_FN_ATTRS512
5863_mm512_srl_epi64(__m512i __A, __m128i __B)
5864{
5865 return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
5866}
5867
5868static __inline__ __m512i __DEFAULT_FN_ATTRS512
5869_mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
5870{
5871 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5872 (__v8di)_mm512_srl_epi64(__A, __B),
5873 (__v8di)__W);
5874}
5875
5876static __inline__ __m512i __DEFAULT_FN_ATTRS512
5877_mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
5878{
5879 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5880 (__v8di)_mm512_srl_epi64(__A, __B),
5881 (__v8di)_mm512_setzero_si512());
5882}
5883
5884static __inline__ __m512i __DEFAULT_FN_ATTRS512
5885_mm512_srlv_epi32(__m512i __X, __m512i __Y)
5886{
5887 return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
5888}
5889
5890static __inline__ __m512i __DEFAULT_FN_ATTRS512
5891_mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
5892{
5893 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5894 (__v16si)_mm512_srlv_epi32(__X, __Y),
5895 (__v16si)__W);
5896}
5897
5898static __inline__ __m512i __DEFAULT_FN_ATTRS512
5899_mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
5900{
5901 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
5902 (__v16si)_mm512_srlv_epi32(__X, __Y),
5903 (__v16si)_mm512_setzero_si512());
5904}
5905
5906static __inline__ __m512i __DEFAULT_FN_ATTRS512
5907_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
5908{
5909 return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
5910}
5911
5912static __inline__ __m512i __DEFAULT_FN_ATTRS512
5913_mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
5914{
5915 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5916 (__v8di)_mm512_srlv_epi64(__X, __Y),
5917 (__v8di)__W);
5918}
5919
5920static __inline__ __m512i __DEFAULT_FN_ATTRS512
5921_mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
5922{
5923 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
5924 (__v8di)_mm512_srlv_epi64(__X, __Y),
5925 (__v8di)_mm512_setzero_si512());
5926}
5927
5928#define _mm512_ternarylogic_epi32(A, B, C, imm) \
5929 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5930 (__v16si)(__m512i)(B), \
5931 (__v16si)(__m512i)(C), (int)(imm), \
5932 (__mmask16)-1)
5933
5934#define _mm512_mask_ternarylogic_epi32(A, U, B, C, imm) \
5935 (__m512i)__builtin_ia32_pternlogd512_mask((__v16si)(__m512i)(A), \
5936 (__v16si)(__m512i)(B), \
5937 (__v16si)(__m512i)(C), (int)(imm), \
5938 (__mmask16)(U))
5939
5940#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, imm) \
5941 (__m512i)__builtin_ia32_pternlogd512_maskz((__v16si)(__m512i)(A), \
5942 (__v16si)(__m512i)(B), \
5943 (__v16si)(__m512i)(C), \
5944 (int)(imm), (__mmask16)(U))
5945
5946#define _mm512_ternarylogic_epi64(A, B, C, imm) \
5947 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5948 (__v8di)(__m512i)(B), \
5949 (__v8di)(__m512i)(C), (int)(imm), \
5950 (__mmask8)-1)
5951
5952#define _mm512_mask_ternarylogic_epi64(A, U, B, C, imm) \
5953 (__m512i)__builtin_ia32_pternlogq512_mask((__v8di)(__m512i)(A), \
5954 (__v8di)(__m512i)(B), \
5955 (__v8di)(__m512i)(C), (int)(imm), \
5956 (__mmask8)(U))
5957
5958#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, imm) \
5959 (__m512i)__builtin_ia32_pternlogq512_maskz((__v8di)(__m512i)(A), \
5960 (__v8di)(__m512i)(B), \
5961 (__v8di)(__m512i)(C), (int)(imm), \
5962 (__mmask8)(U))
5963
5964#ifdef __x86_64__
5965#define _mm_cvt_roundsd_i64(A, R) \
5966 (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
5967#endif
5968
5969#define _mm_cvt_roundsd_si32(A, R) \
5970 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5971
5972#define _mm_cvt_roundsd_i32(A, R) \
5973 (int)__builtin_ia32_vcvtsd2si32((__v2df)(__m128d)(A), (int)(R))
5974
5975#define _mm_cvt_roundsd_u32(A, R) \
5976 (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
5977
5978static __inline__ unsigned __DEFAULT_FN_ATTRS128
5979_mm_cvtsd_u32 (__m128d __A)
5980{
5981 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
5983}
5984
5985#ifdef __x86_64__
5986#define _mm_cvt_roundsd_u64(A, R) \
5987 (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
5988 (int)(R))
5989
5990static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
5991_mm_cvtsd_u64 (__m128d __A)
5992{
5993 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
5994 __A,
5996}
5997#endif
5998
5999#define _mm_cvt_roundss_si32(A, R) \
6000 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6001
6002#define _mm_cvt_roundss_i32(A, R) \
6003 (int)__builtin_ia32_vcvtss2si32((__v4sf)(__m128)(A), (int)(R))
6004
6005#ifdef __x86_64__
6006#define _mm_cvt_roundss_si64(A, R) \
6007 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6008
6009#define _mm_cvt_roundss_i64(A, R) \
6010 (long long)__builtin_ia32_vcvtss2si64((__v4sf)(__m128)(A), (int)(R))
6011#endif
6012
6013#define _mm_cvt_roundss_u32(A, R) \
6014 (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
6015
6016static __inline__ unsigned __DEFAULT_FN_ATTRS128
6017_mm_cvtss_u32 (__m128 __A)
6018{
6019 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
6021}
6022
6023#ifdef __x86_64__
6024#define _mm_cvt_roundss_u64(A, R) \
6025 (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
6026 (int)(R))
6027
6028static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6029_mm_cvtss_u64 (__m128 __A)
6030{
6031 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
6032 __A,
6034}
6035#endif
6036
6037#define _mm_cvtt_roundsd_i32(A, R) \
6038 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6039
6040#define _mm_cvtt_roundsd_si32(A, R) \
6041 (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
6042
6043static __inline__ int __DEFAULT_FN_ATTRS128
6044_mm_cvttsd_i32 (__m128d __A)
6045{
6046 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
6048}
6049
6050#ifdef __x86_64__
6051#define _mm_cvtt_roundsd_si64(A, R) \
6052 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6053
6054#define _mm_cvtt_roundsd_i64(A, R) \
6055 (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
6056
6057static __inline__ long long __DEFAULT_FN_ATTRS128
6058_mm_cvttsd_i64 (__m128d __A)
6059{
6060 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
6062}
6063#endif
6064
6065#define _mm_cvtt_roundsd_u32(A, R) \
6066 (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
6067
6068static __inline__ unsigned __DEFAULT_FN_ATTRS128
6069_mm_cvttsd_u32 (__m128d __A)
6070{
6071 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
6073}
6074
6075#ifdef __x86_64__
6076#define _mm_cvtt_roundsd_u64(A, R) \
6077 (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
6078 (int)(R))
6079
6080static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6081_mm_cvttsd_u64 (__m128d __A)
6082{
6083 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
6084 __A,
6086}
6087#endif
6088
6089#define _mm_cvtt_roundss_i32(A, R) \
6090 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6091
6092#define _mm_cvtt_roundss_si32(A, R) \
6093 (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
6094
6095static __inline__ int __DEFAULT_FN_ATTRS128
6096_mm_cvttss_i32 (__m128 __A)
6097{
6098 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
6100}
6101
6102#ifdef __x86_64__
6103#define _mm_cvtt_roundss_i64(A, R) \
6104 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6105
6106#define _mm_cvtt_roundss_si64(A, R) \
6107 (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
6108
6109static __inline__ long long __DEFAULT_FN_ATTRS128
6110_mm_cvttss_i64 (__m128 __A)
6111{
6112 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
6114}
6115#endif
6116
6117#define _mm_cvtt_roundss_u32(A, R) \
6118 (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
6119
6120static __inline__ unsigned __DEFAULT_FN_ATTRS128
6121_mm_cvttss_u32 (__m128 __A)
6122{
6123 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
6125}
6126
6127#ifdef __x86_64__
6128#define _mm_cvtt_roundss_u64(A, R) \
6129 (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
6130 (int)(R))
6131
6132static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
6133_mm_cvttss_u64 (__m128 __A)
6134{
6135 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
6136 __A,
6138}
6139#endif
6140
6141#define _mm512_permute_pd(X, C) \
6142 (__m512d)__builtin_ia32_vpermilpd512((__v8df)(__m512d)(X), (int)(C))
6143
6144#define _mm512_mask_permute_pd(W, U, X, C) \
6145 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6146 (__v8df)_mm512_permute_pd((X), (C)), \
6147 (__v8df)(__m512d)(W))
6148
6149#define _mm512_maskz_permute_pd(U, X, C) \
6150 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6151 (__v8df)_mm512_permute_pd((X), (C)), \
6152 (__v8df)_mm512_setzero_pd())
6153
6154#define _mm512_permute_ps(X, C) \
6155 (__m512)__builtin_ia32_vpermilps512((__v16sf)(__m512)(X), (int)(C))
6156
6157#define _mm512_mask_permute_ps(W, U, X, C) \
6158 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6159 (__v16sf)_mm512_permute_ps((X), (C)), \
6160 (__v16sf)(__m512)(W))
6161
6162#define _mm512_maskz_permute_ps(U, X, C) \
6163 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6164 (__v16sf)_mm512_permute_ps((X), (C)), \
6165 (__v16sf)_mm512_setzero_ps())
6166
6167static __inline__ __m512d __DEFAULT_FN_ATTRS512
6168_mm512_permutevar_pd(__m512d __A, __m512i __C)
6169{
6170 return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
6171}
6172
6173static __inline__ __m512d __DEFAULT_FN_ATTRS512
6174_mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
6175{
6176 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6177 (__v8df)_mm512_permutevar_pd(__A, __C),
6178 (__v8df)__W);
6179}
6180
6181static __inline__ __m512d __DEFAULT_FN_ATTRS512
6182_mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
6183{
6184 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
6185 (__v8df)_mm512_permutevar_pd(__A, __C),
6186 (__v8df)_mm512_setzero_pd());
6187}
6188
6189static __inline__ __m512 __DEFAULT_FN_ATTRS512
6190_mm512_permutevar_ps(__m512 __A, __m512i __C)
6191{
6192 return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
6193}
6194
6195static __inline__ __m512 __DEFAULT_FN_ATTRS512
6196_mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
6197{
6198 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6199 (__v16sf)_mm512_permutevar_ps(__A, __C),
6200 (__v16sf)__W);
6201}
6202
6203static __inline__ __m512 __DEFAULT_FN_ATTRS512
6205{
6206 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
6207 (__v16sf)_mm512_permutevar_ps(__A, __C),
6208 (__v16sf)_mm512_setzero_ps());
6209}
6210
6211static __inline __m512d __DEFAULT_FN_ATTRS512
6212_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
6213{
6214 return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
6215 (__v8df)__B);
6216}
6217
6218static __inline__ __m512d __DEFAULT_FN_ATTRS512
6219_mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
6220{
6221 return (__m512d)__builtin_ia32_selectpd_512(__U,
6222 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6223 (__v8df)__A);
6224}
6225
6226static __inline__ __m512d __DEFAULT_FN_ATTRS512
6227_mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
6228 __m512d __B)
6229{
6230 return (__m512d)__builtin_ia32_selectpd_512(__U,
6231 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6232 (__v8df)(__m512d)__I);
6233}
6234
6235static __inline__ __m512d __DEFAULT_FN_ATTRS512
6236_mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
6237 __m512d __B)
6238{
6239 return (__m512d)__builtin_ia32_selectpd_512(__U,
6240 (__v8df)_mm512_permutex2var_pd(__A, __I, __B),
6241 (__v8df)_mm512_setzero_pd());
6242}
6243
6244static __inline __m512 __DEFAULT_FN_ATTRS512
6245_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
6246{
6247 return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
6248 (__v16sf) __B);
6249}
6250
6251static __inline__ __m512 __DEFAULT_FN_ATTRS512
6252_mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
6253{
6254 return (__m512)__builtin_ia32_selectps_512(__U,
6255 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6256 (__v16sf)__A);
6257}
6258
6259static __inline__ __m512 __DEFAULT_FN_ATTRS512
6260_mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
6261{
6262 return (__m512)__builtin_ia32_selectps_512(__U,
6263 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6264 (__v16sf)(__m512)__I);
6265}
6266
6267static __inline__ __m512 __DEFAULT_FN_ATTRS512
6268_mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
6269{
6270 return (__m512)__builtin_ia32_selectps_512(__U,
6271 (__v16sf)_mm512_permutex2var_ps(__A, __I, __B),
6272 (__v16sf)_mm512_setzero_ps());
6273}
6274
6275
6276#define _mm512_cvtt_roundpd_epu32(A, R) \
6277 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6278 (__v8si)_mm256_undefined_si256(), \
6279 (__mmask8)-1, (int)(R))
6280
6281#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, R) \
6282 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6283 (__v8si)(__m256i)(W), \
6284 (__mmask8)(U), (int)(R))
6285
6286#define _mm512_maskz_cvtt_roundpd_epu32(U, A, R) \
6287 (__m256i)__builtin_ia32_cvttpd2udq512_mask((__v8df)(__m512d)(A), \
6288 (__v8si)_mm256_setzero_si256(), \
6289 (__mmask8)(U), (int)(R))
6290
6291static __inline__ __m256i __DEFAULT_FN_ATTRS512
6293{
6294 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6295 (__v8si)
6297 (__mmask8) -1,
6299}
6300
6301static __inline__ __m256i __DEFAULT_FN_ATTRS512
6302_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
6303{
6304 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6305 (__v8si) __W,
6306 (__mmask8) __U,
6308}
6309
6310static __inline__ __m256i __DEFAULT_FN_ATTRS512
6312{
6313 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
6314 (__v8si)
6316 (__mmask8) __U,
6318}
6319
6320#define _mm_roundscale_round_sd(A, B, imm, R) \
6321 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6322 (__v2df)(__m128d)(B), \
6323 (__v2df)_mm_setzero_pd(), \
6324 (__mmask8)-1, (int)(imm), \
6325 (int)(R))
6326
6327#define _mm_roundscale_sd(A, B, imm) \
6328 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6329 (__v2df)(__m128d)(B), \
6330 (__v2df)_mm_setzero_pd(), \
6331 (__mmask8)-1, (int)(imm), \
6332 _MM_FROUND_CUR_DIRECTION)
6333
6334#define _mm_mask_roundscale_sd(W, U, A, B, imm) \
6335 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6336 (__v2df)(__m128d)(B), \
6337 (__v2df)(__m128d)(W), \
6338 (__mmask8)(U), (int)(imm), \
6339 _MM_FROUND_CUR_DIRECTION)
6340
6341#define _mm_mask_roundscale_round_sd(W, U, A, B, I, R) \
6342 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6343 (__v2df)(__m128d)(B), \
6344 (__v2df)(__m128d)(W), \
6345 (__mmask8)(U), (int)(I), \
6346 (int)(R))
6347
6348#define _mm_maskz_roundscale_sd(U, A, B, I) \
6349 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6350 (__v2df)(__m128d)(B), \
6351 (__v2df)_mm_setzero_pd(), \
6352 (__mmask8)(U), (int)(I), \
6353 _MM_FROUND_CUR_DIRECTION)
6354
6355#define _mm_maskz_roundscale_round_sd(U, A, B, I, R) \
6356 (__m128d)__builtin_ia32_rndscalesd_round_mask((__v2df)(__m128d)(A), \
6357 (__v2df)(__m128d)(B), \
6358 (__v2df)_mm_setzero_pd(), \
6359 (__mmask8)(U), (int)(I), \
6360 (int)(R))
6361
6362#define _mm_roundscale_round_ss(A, B, imm, R) \
6363 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6364 (__v4sf)(__m128)(B), \
6365 (__v4sf)_mm_setzero_ps(), \
6366 (__mmask8)-1, (int)(imm), \
6367 (int)(R))
6368
6369#define _mm_roundscale_ss(A, B, imm) \
6370 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6371 (__v4sf)(__m128)(B), \
6372 (__v4sf)_mm_setzero_ps(), \
6373 (__mmask8)-1, (int)(imm), \
6374 _MM_FROUND_CUR_DIRECTION)
6375
6376#define _mm_mask_roundscale_ss(W, U, A, B, I) \
6377 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6378 (__v4sf)(__m128)(B), \
6379 (__v4sf)(__m128)(W), \
6380 (__mmask8)(U), (int)(I), \
6381 _MM_FROUND_CUR_DIRECTION)
6382
6383#define _mm_mask_roundscale_round_ss(W, U, A, B, I, R) \
6384 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6385 (__v4sf)(__m128)(B), \
6386 (__v4sf)(__m128)(W), \
6387 (__mmask8)(U), (int)(I), \
6388 (int)(R))
6389
6390#define _mm_maskz_roundscale_ss(U, A, B, I) \
6391 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6392 (__v4sf)(__m128)(B), \
6393 (__v4sf)_mm_setzero_ps(), \
6394 (__mmask8)(U), (int)(I), \
6395 _MM_FROUND_CUR_DIRECTION)
6396
6397#define _mm_maskz_roundscale_round_ss(U, A, B, I, R) \
6398 (__m128)__builtin_ia32_rndscaless_round_mask((__v4sf)(__m128)(A), \
6399 (__v4sf)(__m128)(B), \
6400 (__v4sf)_mm_setzero_ps(), \
6401 (__mmask8)(U), (int)(I), \
6402 (int)(R))
6403
6404#define _mm512_scalef_round_pd(A, B, R) \
6405 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6406 (__v8df)(__m512d)(B), \
6407 (__v8df)_mm512_undefined_pd(), \
6408 (__mmask8)-1, (int)(R))
6409
6410#define _mm512_mask_scalef_round_pd(W, U, A, B, R) \
6411 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6412 (__v8df)(__m512d)(B), \
6413 (__v8df)(__m512d)(W), \
6414 (__mmask8)(U), (int)(R))
6415
6416#define _mm512_maskz_scalef_round_pd(U, A, B, R) \
6417 (__m512d)__builtin_ia32_scalefpd512_mask((__v8df)(__m512d)(A), \
6418 (__v8df)(__m512d)(B), \
6419 (__v8df)_mm512_setzero_pd(), \
6420 (__mmask8)(U), (int)(R))
6421
6422static __inline__ __m512d __DEFAULT_FN_ATTRS512
6423_mm512_scalef_pd (__m512d __A, __m512d __B)
6424{
6425 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6426 (__v8df) __B,
6427 (__v8df)
6429 (__mmask8) -1,
6431}
6432
6433static __inline__ __m512d __DEFAULT_FN_ATTRS512
6434_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
6435{
6436 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6437 (__v8df) __B,
6438 (__v8df) __W,
6439 (__mmask8) __U,
6441}
6442
6443static __inline__ __m512d __DEFAULT_FN_ATTRS512
6444_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
6445{
6446 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
6447 (__v8df) __B,
6448 (__v8df)
6450 (__mmask8) __U,
6452}
6453
6454#define _mm512_scalef_round_ps(A, B, R) \
6455 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6456 (__v16sf)(__m512)(B), \
6457 (__v16sf)_mm512_undefined_ps(), \
6458 (__mmask16)-1, (int)(R))
6459
6460#define _mm512_mask_scalef_round_ps(W, U, A, B, R) \
6461 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6462 (__v16sf)(__m512)(B), \
6463 (__v16sf)(__m512)(W), \
6464 (__mmask16)(U), (int)(R))
6465
6466#define _mm512_maskz_scalef_round_ps(U, A, B, R) \
6467 (__m512)__builtin_ia32_scalefps512_mask((__v16sf)(__m512)(A), \
6468 (__v16sf)(__m512)(B), \
6469 (__v16sf)_mm512_setzero_ps(), \
6470 (__mmask16)(U), (int)(R))
6471
6472static __inline__ __m512 __DEFAULT_FN_ATTRS512
6473_mm512_scalef_ps (__m512 __A, __m512 __B)
6474{
6475 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6476 (__v16sf) __B,
6477 (__v16sf)
6479 (__mmask16) -1,
6481}
6482
6483static __inline__ __m512 __DEFAULT_FN_ATTRS512
6484_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
6485{
6486 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6487 (__v16sf) __B,
6488 (__v16sf) __W,
6489 (__mmask16) __U,
6491}
6492
6493static __inline__ __m512 __DEFAULT_FN_ATTRS512
6494_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
6495{
6496 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
6497 (__v16sf) __B,
6498 (__v16sf)
6500 (__mmask16) __U,
6502}
6503
6504#define _mm_scalef_round_sd(A, B, R) \
6505 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6506 (__v2df)(__m128d)(B), \
6507 (__v2df)_mm_setzero_pd(), \
6508 (__mmask8)-1, (int)(R))
6509
6510static __inline__ __m128d __DEFAULT_FN_ATTRS128
6511_mm_scalef_sd (__m128d __A, __m128d __B)
6512{
6513 return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
6514 (__v2df)( __B), (__v2df) _mm_setzero_pd(),
6515 (__mmask8) -1,
6517}
6518
6519static __inline__ __m128d __DEFAULT_FN_ATTRS128
6520_mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6521{
6522 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6523 (__v2df) __B,
6524 (__v2df) __W,
6525 (__mmask8) __U,
6527}
6528
6529#define _mm_mask_scalef_round_sd(W, U, A, B, R) \
6530 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6531 (__v2df)(__m128d)(B), \
6532 (__v2df)(__m128d)(W), \
6533 (__mmask8)(U), (int)(R))
6534
6535static __inline__ __m128d __DEFAULT_FN_ATTRS128
6536_mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
6537{
6538 return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
6539 (__v2df) __B,
6540 (__v2df) _mm_setzero_pd (),
6541 (__mmask8) __U,
6543}
6544
6545#define _mm_maskz_scalef_round_sd(U, A, B, R) \
6546 (__m128d)__builtin_ia32_scalefsd_round_mask((__v2df)(__m128d)(A), \
6547 (__v2df)(__m128d)(B), \
6548 (__v2df)_mm_setzero_pd(), \
6549 (__mmask8)(U), (int)(R))
6550
6551#define _mm_scalef_round_ss(A, B, R) \
6552 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6553 (__v4sf)(__m128)(B), \
6554 (__v4sf)_mm_setzero_ps(), \
6555 (__mmask8)-1, (int)(R))
6556
6557static __inline__ __m128 __DEFAULT_FN_ATTRS128
6558_mm_scalef_ss (__m128 __A, __m128 __B)
6559{
6560 return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
6561 (__v4sf)( __B), (__v4sf) _mm_setzero_ps(),
6562 (__mmask8) -1,
6564}
6565
6566static __inline__ __m128 __DEFAULT_FN_ATTRS128
6567_mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6568{
6569 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6570 (__v4sf) __B,
6571 (__v4sf) __W,
6572 (__mmask8) __U,
6574}
6575
6576#define _mm_mask_scalef_round_ss(W, U, A, B, R) \
6577 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6578 (__v4sf)(__m128)(B), \
6579 (__v4sf)(__m128)(W), \
6580 (__mmask8)(U), (int)(R))
6581
6582static __inline__ __m128 __DEFAULT_FN_ATTRS128
6583_mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
6584{
6585 return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
6586 (__v4sf) __B,
6587 (__v4sf) _mm_setzero_ps (),
6588 (__mmask8) __U,
6590}
6591
6592#define _mm_maskz_scalef_round_ss(U, A, B, R) \
6593 (__m128)__builtin_ia32_scalefss_round_mask((__v4sf)(__m128)(A), \
6594 (__v4sf)(__m128)(B), \
6595 (__v4sf)_mm_setzero_ps(), \
6596 (__mmask8)(U), \
6597 (int)(R))
6598
6599static __inline__ __m512i __DEFAULT_FN_ATTRS512
6600_mm512_srai_epi32(__m512i __A, unsigned int __B)
6601{
6602 return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
6603}
6604
6605static __inline__ __m512i __DEFAULT_FN_ATTRS512
6606_mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A,
6607 unsigned int __B)
6608{
6609 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6610 (__v16si)_mm512_srai_epi32(__A, __B),
6611 (__v16si)__W);
6612}
6613
6614static __inline__ __m512i __DEFAULT_FN_ATTRS512
6616 unsigned int __B) {
6617 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
6618 (__v16si)_mm512_srai_epi32(__A, __B),
6619 (__v16si)_mm512_setzero_si512());
6620}
6621
6622static __inline__ __m512i __DEFAULT_FN_ATTRS512
6623_mm512_srai_epi64(__m512i __A, unsigned int __B)
6624{
6625 return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
6626}
6627
6628static __inline__ __m512i __DEFAULT_FN_ATTRS512
6629_mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
6630{
6631 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6632 (__v8di)_mm512_srai_epi64(__A, __B),
6633 (__v8di)__W);
6634}
6635
6636static __inline__ __m512i __DEFAULT_FN_ATTRS512
6637_mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
6638{
6639 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
6640 (__v8di)_mm512_srai_epi64(__A, __B),
6641 (__v8di)_mm512_setzero_si512());
6642}
6643
6644#define _mm512_shuffle_f32x4(A, B, imm) \
6645 (__m512)__builtin_ia32_shuf_f32x4((__v16sf)(__m512)(A), \
6646 (__v16sf)(__m512)(B), (int)(imm))
6647
6648#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) \
6649 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6650 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6651 (__v16sf)(__m512)(W))
6652
6653#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) \
6654 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6655 (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
6656 (__v16sf)_mm512_setzero_ps())
6657
6658#define _mm512_shuffle_f64x2(A, B, imm) \
6659 (__m512d)__builtin_ia32_shuf_f64x2((__v8df)(__m512d)(A), \
6660 (__v8df)(__m512d)(B), (int)(imm))
6661
6662#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) \
6663 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6664 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6665 (__v8df)(__m512d)(W))
6666
6667#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) \
6668 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6669 (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
6670 (__v8df)_mm512_setzero_pd())
6671
6672#define _mm512_shuffle_i32x4(A, B, imm) \
6673 (__m512i)__builtin_ia32_shuf_i32x4((__v16si)(__m512i)(A), \
6674 (__v16si)(__m512i)(B), (int)(imm))
6675
6676#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) \
6677 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6678 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6679 (__v16si)(__m512i)(W))
6680
6681#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) \
6682 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
6683 (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
6684 (__v16si)_mm512_setzero_si512())
6685
6686#define _mm512_shuffle_i64x2(A, B, imm) \
6687 (__m512i)__builtin_ia32_shuf_i64x2((__v8di)(__m512i)(A), \
6688 (__v8di)(__m512i)(B), (int)(imm))
6689
6690#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) \
6691 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6692 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6693 (__v8di)(__m512i)(W))
6694
6695#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) \
6696 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
6697 (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
6698 (__v8di)_mm512_setzero_si512())
6699
6700#define _mm512_shuffle_pd(A, B, M) \
6701 (__m512d)__builtin_ia32_shufpd512((__v8df)(__m512d)(A), \
6702 (__v8df)(__m512d)(B), (int)(M))
6703
6704#define _mm512_mask_shuffle_pd(W, U, A, B, M) \
6705 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6706 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6707 (__v8df)(__m512d)(W))
6708
6709#define _mm512_maskz_shuffle_pd(U, A, B, M) \
6710 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
6711 (__v8df)_mm512_shuffle_pd((A), (B), (M)), \
6712 (__v8df)_mm512_setzero_pd())
6713
6714#define _mm512_shuffle_ps(A, B, M) \
6715 (__m512)__builtin_ia32_shufps512((__v16sf)(__m512)(A), \
6716 (__v16sf)(__m512)(B), (int)(M))
6717
6718#define _mm512_mask_shuffle_ps(W, U, A, B, M) \
6719 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6720 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6721 (__v16sf)(__m512)(W))
6722
6723#define _mm512_maskz_shuffle_ps(U, A, B, M) \
6724 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
6725 (__v16sf)_mm512_shuffle_ps((A), (B), (M)), \
6726 (__v16sf)_mm512_setzero_ps())
6727
6728#define _mm_sqrt_round_sd(A, B, R) \
6729 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6730 (__v2df)(__m128d)(B), \
6731 (__v2df)_mm_setzero_pd(), \
6732 (__mmask8)-1, (int)(R))
6733
6734static __inline__ __m128d __DEFAULT_FN_ATTRS128
6735_mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
6736{
6737 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6738 (__v2df) __B,
6739 (__v2df) __W,
6740 (__mmask8) __U,
6742}
6743
6744#define _mm_mask_sqrt_round_sd(W, U, A, B, R) \
6745 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6746 (__v2df)(__m128d)(B), \
6747 (__v2df)(__m128d)(W), \
6748 (__mmask8)(U), (int)(R))
6749
6750static __inline__ __m128d __DEFAULT_FN_ATTRS128
6751_mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
6752{
6753 return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
6754 (__v2df) __B,
6755 (__v2df) _mm_setzero_pd (),
6756 (__mmask8) __U,
6758}
6759
6760#define _mm_maskz_sqrt_round_sd(U, A, B, R) \
6761 (__m128d)__builtin_ia32_sqrtsd_round_mask((__v2df)(__m128d)(A), \
6762 (__v2df)(__m128d)(B), \
6763 (__v2df)_mm_setzero_pd(), \
6764 (__mmask8)(U), (int)(R))
6765
6766#define _mm_sqrt_round_ss(A, B, R) \
6767 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6768 (__v4sf)(__m128)(B), \
6769 (__v4sf)_mm_setzero_ps(), \
6770 (__mmask8)-1, (int)(R))
6771
6772static __inline__ __m128 __DEFAULT_FN_ATTRS128
6773_mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
6774{
6775 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6776 (__v4sf) __B,
6777 (__v4sf) __W,
6778 (__mmask8) __U,
6780}
6781
6782#define _mm_mask_sqrt_round_ss(W, U, A, B, R) \
6783 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6784 (__v4sf)(__m128)(B), \
6785 (__v4sf)(__m128)(W), (__mmask8)(U), \
6786 (int)(R))
6787
6788static __inline__ __m128 __DEFAULT_FN_ATTRS128
6789_mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
6790{
6791 return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
6792 (__v4sf) __B,
6793 (__v4sf) _mm_setzero_ps (),
6794 (__mmask8) __U,
6796}
6797
6798#define _mm_maskz_sqrt_round_ss(U, A, B, R) \
6799 (__m128)__builtin_ia32_sqrtss_round_mask((__v4sf)(__m128)(A), \
6800 (__v4sf)(__m128)(B), \
6801 (__v4sf)_mm_setzero_ps(), \
6802 (__mmask8)(U), (int)(R))
6803
6804static __inline__ __m512 __DEFAULT_FN_ATTRS512
6806{
6807 return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6808 0, 1, 2, 3, 0, 1, 2, 3,
6809 0, 1, 2, 3, 0, 1, 2, 3);
6810}
6811
6812static __inline__ __m512 __DEFAULT_FN_ATTRS512
6813_mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
6814{
6815 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6816 (__v16sf)_mm512_broadcast_f32x4(__A),
6817 (__v16sf)__O);
6818}
6819
6820static __inline__ __m512 __DEFAULT_FN_ATTRS512
6822{
6823 return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
6824 (__v16sf)_mm512_broadcast_f32x4(__A),
6825 (__v16sf)_mm512_setzero_ps());
6826}
6827
6828static __inline__ __m512d __DEFAULT_FN_ATTRS512
6830{
6831 return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
6832 0, 1, 2, 3, 0, 1, 2, 3);
6833}
6834
6835static __inline__ __m512d __DEFAULT_FN_ATTRS512
6836_mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
6837{
6838 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6839 (__v8df)_mm512_broadcast_f64x4(__A),
6840 (__v8df)__O);
6841}
6842
6843static __inline__ __m512d __DEFAULT_FN_ATTRS512
6845{
6846 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
6847 (__v8df)_mm512_broadcast_f64x4(__A),
6848 (__v8df)_mm512_setzero_pd());
6849}
6850
6851static __inline__ __m512i __DEFAULT_FN_ATTRS512
6853{
6854 return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6855 0, 1, 2, 3, 0, 1, 2, 3,
6856 0, 1, 2, 3, 0, 1, 2, 3);
6857}
6858
6859static __inline__ __m512i __DEFAULT_FN_ATTRS512
6860_mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
6861{
6862 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6863 (__v16si)_mm512_broadcast_i32x4(__A),
6864 (__v16si)__O);
6865}
6866
6867static __inline__ __m512i __DEFAULT_FN_ATTRS512
6869{
6870 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
6871 (__v16si)_mm512_broadcast_i32x4(__A),
6872 (__v16si)_mm512_setzero_si512());
6873}
6874
6875static __inline__ __m512i __DEFAULT_FN_ATTRS512
6877{
6878 return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
6879 0, 1, 2, 3, 0, 1, 2, 3);
6880}
6881
6882static __inline__ __m512i __DEFAULT_FN_ATTRS512
6883_mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
6884{
6885 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6886 (__v8di)_mm512_broadcast_i64x4(__A),
6887 (__v8di)__O);
6888}
6889
6890static __inline__ __m512i __DEFAULT_FN_ATTRS512
6892{
6893 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
6894 (__v8di)_mm512_broadcast_i64x4(__A),
6895 (__v8di)_mm512_setzero_si512());
6896}
6897
6898static __inline__ __m512d __DEFAULT_FN_ATTRS512
6899_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
6900{
6901 return (__m512d)__builtin_ia32_selectpd_512(__M,
6902 (__v8df) _mm512_broadcastsd_pd(__A),
6903 (__v8df) __O);
6904}
6905
6906static __inline__ __m512d __DEFAULT_FN_ATTRS512
6908{
6909 return (__m512d)__builtin_ia32_selectpd_512(__M,
6910 (__v8df) _mm512_broadcastsd_pd(__A),
6911 (__v8df) _mm512_setzero_pd());
6912}
6913
6914static __inline__ __m512 __DEFAULT_FN_ATTRS512
6915_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
6916{
6917 return (__m512)__builtin_ia32_selectps_512(__M,
6918 (__v16sf) _mm512_broadcastss_ps(__A),
6919 (__v16sf) __O);
6920}
6921
6922static __inline__ __m512 __DEFAULT_FN_ATTRS512
6924{
6925 return (__m512)__builtin_ia32_selectps_512(__M,
6926 (__v16sf) _mm512_broadcastss_ps(__A),
6927 (__v16sf) _mm512_setzero_ps());
6928}
6929
6930static __inline__ __m128i __DEFAULT_FN_ATTRS512
6932{
6933 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6934 (__v16qi) _mm_undefined_si128 (),
6935 (__mmask16) -1);
6936}
6937
6938static __inline__ __m128i __DEFAULT_FN_ATTRS512
6939_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
6940{
6941 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6942 (__v16qi) __O, __M);
6943}
6944
6945static __inline__ __m128i __DEFAULT_FN_ATTRS512
6947{
6948 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
6949 (__v16qi) _mm_setzero_si128 (),
6950 __M);
6951}
6952
6953static __inline__ void __DEFAULT_FN_ATTRS512
6954_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
6955{
6956 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
6957}
6958
6959static __inline__ __m256i __DEFAULT_FN_ATTRS512
6961{
6962 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6963 (__v16hi) _mm256_undefined_si256 (),
6964 (__mmask16) -1);
6965}
6966
6967static __inline__ __m256i __DEFAULT_FN_ATTRS512
6968_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
6969{
6970 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6971 (__v16hi) __O, __M);
6972}
6973
6974static __inline__ __m256i __DEFAULT_FN_ATTRS512
6976{
6977 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
6978 (__v16hi) _mm256_setzero_si256 (),
6979 __M);
6980}
6981
6982static __inline__ void __DEFAULT_FN_ATTRS512
6984{
6985 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
6986}
6987
6988static __inline__ __m128i __DEFAULT_FN_ATTRS512
6990{
6991 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
6992 (__v16qi) _mm_undefined_si128 (),
6993 (__mmask8) -1);
6994}
6995
6996static __inline__ __m128i __DEFAULT_FN_ATTRS512
6997_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
6998{
6999 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7000 (__v16qi) __O, __M);
7001}
7002
7003static __inline__ __m128i __DEFAULT_FN_ATTRS512
7005{
7006 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
7007 (__v16qi) _mm_setzero_si128 (),
7008 __M);
7009}
7010
7011static __inline__ void __DEFAULT_FN_ATTRS512
7012_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7013{
7014 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7015}
7016
7017static __inline__ __m256i __DEFAULT_FN_ATTRS512
7019{
7020 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7021 (__v8si) _mm256_undefined_si256 (),
7022 (__mmask8) -1);
7023}
7024
7025static __inline__ __m256i __DEFAULT_FN_ATTRS512
7026_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7027{
7028 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7029 (__v8si) __O, __M);
7030}
7031
7032static __inline__ __m256i __DEFAULT_FN_ATTRS512
7034{
7035 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
7036 (__v8si) _mm256_setzero_si256 (),
7037 __M);
7038}
7039
7040static __inline__ void __DEFAULT_FN_ATTRS512
7042{
7043 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7044}
7045
7046static __inline__ __m128i __DEFAULT_FN_ATTRS512
7048{
7049 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7050 (__v8hi) _mm_undefined_si128 (),
7051 (__mmask8) -1);
7052}
7053
7054static __inline__ __m128i __DEFAULT_FN_ATTRS512
7055_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7056{
7057 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7058 (__v8hi) __O, __M);
7059}
7060
7061static __inline__ __m128i __DEFAULT_FN_ATTRS512
7063{
7064 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
7065 (__v8hi) _mm_setzero_si128 (),
7066 __M);
7067}
7068
7069static __inline__ void __DEFAULT_FN_ATTRS512
7070_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
7071{
7072 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7073}
7074
7075static __inline__ __m128i __DEFAULT_FN_ATTRS512
7077{
7078 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7079 (__v16qi) _mm_undefined_si128 (),
7080 (__mmask16) -1);
7081}
7082
7083static __inline__ __m128i __DEFAULT_FN_ATTRS512
7084_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7085{
7086 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7087 (__v16qi) __O,
7088 __M);
7089}
7090
7091static __inline__ __m128i __DEFAULT_FN_ATTRS512
7093{
7094 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
7095 (__v16qi) _mm_setzero_si128 (),
7096 __M);
7097}
7098
7099static __inline__ void __DEFAULT_FN_ATTRS512
7100_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7101{
7102 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7103}
7104
7105static __inline__ __m256i __DEFAULT_FN_ATTRS512
7107{
7108 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7109 (__v16hi) _mm256_undefined_si256 (),
7110 (__mmask16) -1);
7111}
7112
7113static __inline__ __m256i __DEFAULT_FN_ATTRS512
7114_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7115{
7116 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7117 (__v16hi) __O,
7118 __M);
7119}
7120
7121static __inline__ __m256i __DEFAULT_FN_ATTRS512
7123{
7124 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
7125 (__v16hi) _mm256_setzero_si256 (),
7126 __M);
7127}
7128
7129static __inline__ void __DEFAULT_FN_ATTRS512
7131{
7132 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
7133}
7134
7135static __inline__ __m128i __DEFAULT_FN_ATTRS512
7137{
7138 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7139 (__v16qi) _mm_undefined_si128 (),
7140 (__mmask8) -1);
7141}
7142
7143static __inline__ __m128i __DEFAULT_FN_ATTRS512
7144_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7145{
7146 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7147 (__v16qi) __O,
7148 __M);
7149}
7150
7151static __inline__ __m128i __DEFAULT_FN_ATTRS512
7153{
7154 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
7155 (__v16qi) _mm_setzero_si128 (),
7156 __M);
7157}
7158
7159static __inline__ void __DEFAULT_FN_ATTRS512
7160_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7161{
7162 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7163}
7164
7165static __inline__ __m256i __DEFAULT_FN_ATTRS512
7167{
7168 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7169 (__v8si) _mm256_undefined_si256 (),
7170 (__mmask8) -1);
7171}
7172
7173static __inline__ __m256i __DEFAULT_FN_ATTRS512
7174_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7175{
7176 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7177 (__v8si) __O, __M);
7178}
7179
7180static __inline__ __m256i __DEFAULT_FN_ATTRS512
7182{
7183 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
7184 (__v8si) _mm256_setzero_si256 (),
7185 __M);
7186}
7187
7188static __inline__ void __DEFAULT_FN_ATTRS512
7190{
7191 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
7192}
7193
7194static __inline__ __m128i __DEFAULT_FN_ATTRS512
7196{
7197 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7198 (__v8hi) _mm_undefined_si128 (),
7199 (__mmask8) -1);
7200}
7201
7202static __inline__ __m128i __DEFAULT_FN_ATTRS512
7203_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7204{
7205 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7206 (__v8hi) __O, __M);
7207}
7208
7209static __inline__ __m128i __DEFAULT_FN_ATTRS512
7211{
7212 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
7213 (__v8hi) _mm_setzero_si128 (),
7214 __M);
7215}
7216
7217static __inline__ void __DEFAULT_FN_ATTRS512
7219{
7220 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
7221}
7222
7223static __inline__ __m128i __DEFAULT_FN_ATTRS512
7225{
7226 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7227 (__v16qi) _mm_undefined_si128 (),
7228 (__mmask16) -1);
7229}
7230
7231static __inline__ __m128i __DEFAULT_FN_ATTRS512
7232_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
7233{
7234 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7235 (__v16qi) __O, __M);
7236}
7237
7238static __inline__ __m128i __DEFAULT_FN_ATTRS512
7240{
7241 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
7242 (__v16qi) _mm_setzero_si128 (),
7243 __M);
7244}
7245
7246static __inline__ void __DEFAULT_FN_ATTRS512
7247_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
7248{
7249 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
7250}
7251
7252static __inline__ __m256i __DEFAULT_FN_ATTRS512
7254{
7255 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7256 (__v16hi) _mm256_undefined_si256 (),
7257 (__mmask16) -1);
7258}
7259
7260static __inline__ __m256i __DEFAULT_FN_ATTRS512
7261_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
7262{
7263 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7264 (__v16hi) __O, __M);
7265}
7266
7267static __inline__ __m256i __DEFAULT_FN_ATTRS512
7269{
7270 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
7271 (__v16hi) _mm256_setzero_si256 (),
7272 __M);
7273}
7274
7275static __inline__ void __DEFAULT_FN_ATTRS512
7276_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
7277{
7278 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
7279}
7280
7281static __inline__ __m128i __DEFAULT_FN_ATTRS512
7283{
7284 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7285 (__v16qi) _mm_undefined_si128 (),
7286 (__mmask8) -1);
7287}
7288
7289static __inline__ __m128i __DEFAULT_FN_ATTRS512
7290_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
7291{
7292 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7293 (__v16qi) __O, __M);
7294}
7295
7296static __inline__ __m128i __DEFAULT_FN_ATTRS512
7298{
7299 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
7300 (__v16qi) _mm_setzero_si128 (),
7301 __M);
7302}
7303
7304static __inline__ void __DEFAULT_FN_ATTRS512
7305_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
7306{
7307 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
7308}
7309
7310static __inline__ __m256i __DEFAULT_FN_ATTRS512
7312{
7313 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7314 (__v8si) _mm256_undefined_si256 (),
7315 (__mmask8) -1);
7316}
7317
7318static __inline__ __m256i __DEFAULT_FN_ATTRS512
7319_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
7320{
7321 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7322 (__v8si) __O, __M);
7323}
7324
7325static __inline__ __m256i __DEFAULT_FN_ATTRS512
7327{
7328 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
7329 (__v8si) _mm256_setzero_si256 (),
7330 __M);
7331}
7332
7333static __inline__ void __DEFAULT_FN_ATTRS512
7334_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
7335{
7336 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
7337}
7338
7339static __inline__ __m128i __DEFAULT_FN_ATTRS512
7341{
7342 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7343 (__v8hi) _mm_undefined_si128 (),
7344 (__mmask8) -1);
7345}
7346
7347static __inline__ __m128i __DEFAULT_FN_ATTRS512
7348_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
7349{
7350 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7351 (__v8hi) __O, __M);
7352}
7353
7354static __inline__ __m128i __DEFAULT_FN_ATTRS512
7356{
7357 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
7358 (__v8hi) _mm_setzero_si128 (),
7359 __M);
7360}
7361
7362static __inline__ void __DEFAULT_FN_ATTRS512
7363_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
7364{
7365 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
7366}
7367
7368#define _mm512_extracti32x4_epi32(A, imm) \
7369 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7370 (__v4si)_mm_undefined_si128(), \
7371 (__mmask8)-1)
7372
7373#define _mm512_mask_extracti32x4_epi32(W, U, A, imm) \
7374 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7375 (__v4si)(__m128i)(W), \
7376 (__mmask8)(U))
7377
7378#define _mm512_maskz_extracti32x4_epi32(U, A, imm) \
7379 (__m128i)__builtin_ia32_extracti32x4_mask((__v16si)(__m512i)(A), (int)(imm), \
7380 (__v4si)_mm_setzero_si128(), \
7381 (__mmask8)(U))
7382
7383#define _mm512_extracti64x4_epi64(A, imm) \
7384 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7385 (__v4di)_mm256_undefined_si256(), \
7386 (__mmask8)-1)
7387
7388#define _mm512_mask_extracti64x4_epi64(W, U, A, imm) \
7389 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7390 (__v4di)(__m256i)(W), \
7391 (__mmask8)(U))
7392
7393#define _mm512_maskz_extracti64x4_epi64(U, A, imm) \
7394 (__m256i)__builtin_ia32_extracti64x4_mask((__v8di)(__m512i)(A), (int)(imm), \
7395 (__v4di)_mm256_setzero_si256(), \
7396 (__mmask8)(U))
7397
7398#define _mm512_insertf64x4(A, B, imm) \
7399 (__m512d)__builtin_ia32_insertf64x4((__v8df)(__m512d)(A), \
7400 (__v4df)(__m256d)(B), (int)(imm))
7401
7402#define _mm512_mask_insertf64x4(W, U, A, B, imm) \
7403 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7404 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7405 (__v8df)(__m512d)(W))
7406
7407#define _mm512_maskz_insertf64x4(U, A, B, imm) \
7408 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
7409 (__v8df)_mm512_insertf64x4((A), (B), (imm)), \
7410 (__v8df)_mm512_setzero_pd())
7411
7412#define _mm512_inserti64x4(A, B, imm) \
7413 (__m512i)__builtin_ia32_inserti64x4((__v8di)(__m512i)(A), \
7414 (__v4di)(__m256i)(B), (int)(imm))
7415
7416#define _mm512_mask_inserti64x4(W, U, A, B, imm) \
7417 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7418 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7419 (__v8di)(__m512i)(W))
7420
7421#define _mm512_maskz_inserti64x4(U, A, B, imm) \
7422 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
7423 (__v8di)_mm512_inserti64x4((A), (B), (imm)), \
7424 (__v8di)_mm512_setzero_si512())
7425
7426#define _mm512_insertf32x4(A, B, imm) \
7427 (__m512)__builtin_ia32_insertf32x4((__v16sf)(__m512)(A), \
7428 (__v4sf)(__m128)(B), (int)(imm))
7429
7430#define _mm512_mask_insertf32x4(W, U, A, B, imm) \
7431 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7432 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7433 (__v16sf)(__m512)(W))
7434
7435#define _mm512_maskz_insertf32x4(U, A, B, imm) \
7436 (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
7437 (__v16sf)_mm512_insertf32x4((A), (B), (imm)), \
7438 (__v16sf)_mm512_setzero_ps())
7439
7440#define _mm512_inserti32x4(A, B, imm) \
7441 (__m512i)__builtin_ia32_inserti32x4((__v16si)(__m512i)(A), \
7442 (__v4si)(__m128i)(B), (int)(imm))
7443
7444#define _mm512_mask_inserti32x4(W, U, A, B, imm) \
7445 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7446 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7447 (__v16si)(__m512i)(W))
7448
7449#define _mm512_maskz_inserti32x4(U, A, B, imm) \
7450 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
7451 (__v16si)_mm512_inserti32x4((A), (B), (imm)), \
7452 (__v16si)_mm512_setzero_si512())
7453
7454#define _mm512_getmant_round_pd(A, B, C, R) \
7455 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7456 (int)(((C)<<2) | (B)), \
7457 (__v8df)_mm512_undefined_pd(), \
7458 (__mmask8)-1, (int)(R))
7459
7460#define _mm512_mask_getmant_round_pd(W, U, A, B, C, R) \
7461 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7462 (int)(((C)<<2) | (B)), \
7463 (__v8df)(__m512d)(W), \
7464 (__mmask8)(U), (int)(R))
7465
7466#define _mm512_maskz_getmant_round_pd(U, A, B, C, R) \
7467 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7468 (int)(((C)<<2) | (B)), \
7469 (__v8df)_mm512_setzero_pd(), \
7470 (__mmask8)(U), (int)(R))
7471
7472#define _mm512_getmant_pd(A, B, C) \
7473 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7474 (int)(((C)<<2) | (B)), \
7475 (__v8df)_mm512_setzero_pd(), \
7476 (__mmask8)-1, \
7477 _MM_FROUND_CUR_DIRECTION)
7478
7479#define _mm512_mask_getmant_pd(W, U, A, B, C) \
7480 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7481 (int)(((C)<<2) | (B)), \
7482 (__v8df)(__m512d)(W), \
7483 (__mmask8)(U), \
7484 _MM_FROUND_CUR_DIRECTION)
7485
7486#define _mm512_maskz_getmant_pd(U, A, B, C) \
7487 (__m512d)__builtin_ia32_getmantpd512_mask((__v8df)(__m512d)(A), \
7488 (int)(((C)<<2) | (B)), \
7489 (__v8df)_mm512_setzero_pd(), \
7490 (__mmask8)(U), \
7491 _MM_FROUND_CUR_DIRECTION)
7492
7493#define _mm512_getmant_round_ps(A, B, C, R) \
7494 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7495 (int)(((C)<<2) | (B)), \
7496 (__v16sf)_mm512_undefined_ps(), \
7497 (__mmask16)-1, (int)(R))
7498
7499#define _mm512_mask_getmant_round_ps(W, U, A, B, C, R) \
7500 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7501 (int)(((C)<<2) | (B)), \
7502 (__v16sf)(__m512)(W), \
7503 (__mmask16)(U), (int)(R))
7504
7505#define _mm512_maskz_getmant_round_ps(U, A, B, C, R) \
7506 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7507 (int)(((C)<<2) | (B)), \
7508 (__v16sf)_mm512_setzero_ps(), \
7509 (__mmask16)(U), (int)(R))
7510
7511#define _mm512_getmant_ps(A, B, C) \
7512 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7513 (int)(((C)<<2)|(B)), \
7514 (__v16sf)_mm512_undefined_ps(), \
7515 (__mmask16)-1, \
7516 _MM_FROUND_CUR_DIRECTION)
7517
7518#define _mm512_mask_getmant_ps(W, U, A, B, C) \
7519 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7520 (int)(((C)<<2)|(B)), \
7521 (__v16sf)(__m512)(W), \
7522 (__mmask16)(U), \
7523 _MM_FROUND_CUR_DIRECTION)
7524
7525#define _mm512_maskz_getmant_ps(U, A, B, C) \
7526 (__m512)__builtin_ia32_getmantps512_mask((__v16sf)(__m512)(A), \
7527 (int)(((C)<<2)|(B)), \
7528 (__v16sf)_mm512_setzero_ps(), \
7529 (__mmask16)(U), \
7530 _MM_FROUND_CUR_DIRECTION)
7531
7532#define _mm512_getexp_round_pd(A, R) \
7533 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7534 (__v8df)_mm512_undefined_pd(), \
7535 (__mmask8)-1, (int)(R))
7536
7537#define _mm512_mask_getexp_round_pd(W, U, A, R) \
7538 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7539 (__v8df)(__m512d)(W), \
7540 (__mmask8)(U), (int)(R))
7541
7542#define _mm512_maskz_getexp_round_pd(U, A, R) \
7543 (__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \
7544 (__v8df)_mm512_setzero_pd(), \
7545 (__mmask8)(U), (int)(R))
7546
7547static __inline__ __m512d __DEFAULT_FN_ATTRS512
7548_mm512_getexp_pd (__m512d __A)
7549{
7550 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7551 (__v8df) _mm512_undefined_pd (),
7552 (__mmask8) -1,
7554}
7555
7556static __inline__ __m512d __DEFAULT_FN_ATTRS512
7557_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
7558{
7559 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7560 (__v8df) __W,
7561 (__mmask8) __U,
7563}
7564
7565static __inline__ __m512d __DEFAULT_FN_ATTRS512
7567{
7568 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
7569 (__v8df) _mm512_setzero_pd (),
7570 (__mmask8) __U,
7572}
7573
7574#define _mm512_getexp_round_ps(A, R) \
7575 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7576 (__v16sf)_mm512_undefined_ps(), \
7577 (__mmask16)-1, (int)(R))
7578
7579#define _mm512_mask_getexp_round_ps(W, U, A, R) \
7580 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7581 (__v16sf)(__m512)(W), \
7582 (__mmask16)(U), (int)(R))
7583
7584#define _mm512_maskz_getexp_round_ps(U, A, R) \
7585 (__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \
7586 (__v16sf)_mm512_setzero_ps(), \
7587 (__mmask16)(U), (int)(R))
7588
7589static __inline__ __m512 __DEFAULT_FN_ATTRS512
7591{
7592 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7593 (__v16sf) _mm512_undefined_ps (),
7594 (__mmask16) -1,
7596}
7597
7598static __inline__ __m512 __DEFAULT_FN_ATTRS512
7599_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
7600{
7601 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7602 (__v16sf) __W,
7603 (__mmask16) __U,
7605}
7606
7607static __inline__ __m512 __DEFAULT_FN_ATTRS512
7609{
7610 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
7611 (__v16sf) _mm512_setzero_ps (),
7612 (__mmask16) __U,
7614}
7615
7616#define _mm512_i64gather_ps(index, addr, scale) \
7617 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)_mm256_undefined_ps(), \
7618 (void const *)(addr), \
7619 (__v8di)(__m512i)(index), (__mmask8)-1, \
7620 (int)(scale))
7621
7622#define _mm512_mask_i64gather_ps(v1_old, mask, index, addr, scale) \
7623 (__m256)__builtin_ia32_gatherdiv16sf((__v8sf)(__m256)(v1_old),\
7624 (void const *)(addr), \
7625 (__v8di)(__m512i)(index), \
7626 (__mmask8)(mask), (int)(scale))
7627
7628#define _mm512_i64gather_epi32(index, addr, scale) \
7629 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)_mm256_undefined_si256(), \
7630 (void const *)(addr), \
7631 (__v8di)(__m512i)(index), \
7632 (__mmask8)-1, (int)(scale))
7633
7634#define _mm512_mask_i64gather_epi32(v1_old, mask, index, addr, scale) \
7635 (__m256i)__builtin_ia32_gatherdiv16si((__v8si)(__m256i)(v1_old), \
7636 (void const *)(addr), \
7637 (__v8di)(__m512i)(index), \
7638 (__mmask8)(mask), (int)(scale))
7639
7640#define _mm512_i64gather_pd(index, addr, scale) \
7641 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)_mm512_undefined_pd(), \
7642 (void const *)(addr), \
7643 (__v8di)(__m512i)(index), (__mmask8)-1, \
7644 (int)(scale))
7645
7646#define _mm512_mask_i64gather_pd(v1_old, mask, index, addr, scale) \
7647 (__m512d)__builtin_ia32_gatherdiv8df((__v8df)(__m512d)(v1_old), \
7648 (void const *)(addr), \
7649 (__v8di)(__m512i)(index), \
7650 (__mmask8)(mask), (int)(scale))
7651
7652#define _mm512_i64gather_epi64(index, addr, scale) \
7653 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)_mm512_undefined_epi32(), \
7654 (void const *)(addr), \
7655 (__v8di)(__m512i)(index), (__mmask8)-1, \
7656 (int)(scale))
7657
7658#define _mm512_mask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7659 (__m512i)__builtin_ia32_gatherdiv8di((__v8di)(__m512i)(v1_old), \
7660 (void const *)(addr), \
7661 (__v8di)(__m512i)(index), \
7662 (__mmask8)(mask), (int)(scale))
7663
7664#define _mm512_i32gather_ps(index, addr, scale) \
7665 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)_mm512_undefined_ps(), \
7666 (void const *)(addr), \
7667 (__v16si)(__m512)(index), \
7668 (__mmask16)-1, (int)(scale))
7669
7670#define _mm512_mask_i32gather_ps(v1_old, mask, index, addr, scale) \
7671 (__m512)__builtin_ia32_gathersiv16sf((__v16sf)(__m512)(v1_old), \
7672 (void const *)(addr), \
7673 (__v16si)(__m512)(index), \
7674 (__mmask16)(mask), (int)(scale))
7675
7676#define _mm512_i32gather_epi32(index, addr, scale) \
7677 (__m512i)__builtin_ia32_gathersiv16si((__v16si)_mm512_undefined_epi32(), \
7678 (void const *)(addr), \
7679 (__v16si)(__m512i)(index), \
7680 (__mmask16)-1, (int)(scale))
7681
7682#define _mm512_mask_i32gather_epi32(v1_old, mask, index, addr, scale) \
7683 (__m512i)__builtin_ia32_gathersiv16si((__v16si)(__m512i)(v1_old), \
7684 (void const *)(addr), \
7685 (__v16si)(__m512i)(index), \
7686 (__mmask16)(mask), (int)(scale))
7687
7688#define _mm512_i32gather_pd(index, addr, scale) \
7689 (__m512d)__builtin_ia32_gathersiv8df((__v8df)_mm512_undefined_pd(), \
7690 (void const *)(addr), \
7691 (__v8si)(__m256i)(index), (__mmask8)-1, \
7692 (int)(scale))
7693
7694#define _mm512_mask_i32gather_pd(v1_old, mask, index, addr, scale) \
7695 (__m512d)__builtin_ia32_gathersiv8df((__v8df)(__m512d)(v1_old), \
7696 (void const *)(addr), \
7697 (__v8si)(__m256i)(index), \
7698 (__mmask8)(mask), (int)(scale))
7699
7700#define _mm512_i32gather_epi64(index, addr, scale) \
7701 (__m512i)__builtin_ia32_gathersiv8di((__v8di)_mm512_undefined_epi32(), \
7702 (void const *)(addr), \
7703 (__v8si)(__m256i)(index), (__mmask8)-1, \
7704 (int)(scale))
7705
7706#define _mm512_mask_i32gather_epi64(v1_old, mask, index, addr, scale) \
7707 (__m512i)__builtin_ia32_gathersiv8di((__v8di)(__m512i)(v1_old), \
7708 (void const *)(addr), \
7709 (__v8si)(__m256i)(index), \
7710 (__mmask8)(mask), (int)(scale))
7711
7712#define _mm512_i64scatter_ps(addr, index, v1, scale) \
7713 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)-1, \
7714 (__v8di)(__m512i)(index), \
7715 (__v8sf)(__m256)(v1), (int)(scale))
7716
7717#define _mm512_mask_i64scatter_ps(addr, mask, index, v1, scale) \
7718 __builtin_ia32_scatterdiv16sf((void *)(addr), (__mmask8)(mask), \
7719 (__v8di)(__m512i)(index), \
7720 (__v8sf)(__m256)(v1), (int)(scale))
7721
7722#define _mm512_i64scatter_epi32(addr, index, v1, scale) \
7723 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)-1, \
7724 (__v8di)(__m512i)(index), \
7725 (__v8si)(__m256i)(v1), (int)(scale))
7726
7727#define _mm512_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
7728 __builtin_ia32_scatterdiv16si((void *)(addr), (__mmask8)(mask), \
7729 (__v8di)(__m512i)(index), \
7730 (__v8si)(__m256i)(v1), (int)(scale))
7731
7732#define _mm512_i64scatter_pd(addr, index, v1, scale) \
7733 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)-1, \
7734 (__v8di)(__m512i)(index), \
7735 (__v8df)(__m512d)(v1), (int)(scale))
7736
7737#define _mm512_mask_i64scatter_pd(addr, mask, index, v1, scale) \
7738 __builtin_ia32_scatterdiv8df((void *)(addr), (__mmask8)(mask), \
7739 (__v8di)(__m512i)(index), \
7740 (__v8df)(__m512d)(v1), (int)(scale))
7741
7742#define _mm512_i64scatter_epi64(addr, index, v1, scale) \
7743 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)-1, \
7744 (__v8di)(__m512i)(index), \
7745 (__v8di)(__m512i)(v1), (int)(scale))
7746
7747#define _mm512_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
7748 __builtin_ia32_scatterdiv8di((void *)(addr), (__mmask8)(mask), \
7749 (__v8di)(__m512i)(index), \
7750 (__v8di)(__m512i)(v1), (int)(scale))
7751
7752#define _mm512_i32scatter_ps(addr, index, v1, scale) \
7753 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)-1, \
7754 (__v16si)(__m512i)(index), \
7755 (__v16sf)(__m512)(v1), (int)(scale))
7756
7757#define _mm512_mask_i32scatter_ps(addr, mask, index, v1, scale) \
7758 __builtin_ia32_scattersiv16sf((void *)(addr), (__mmask16)(mask), \
7759 (__v16si)(__m512i)(index), \
7760 (__v16sf)(__m512)(v1), (int)(scale))
7761
7762#define _mm512_i32scatter_epi32(addr, index, v1, scale) \
7763 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)-1, \
7764 (__v16si)(__m512i)(index), \
7765 (__v16si)(__m512i)(v1), (int)(scale))
7766
7767#define _mm512_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
7768 __builtin_ia32_scattersiv16si((void *)(addr), (__mmask16)(mask), \
7769 (__v16si)(__m512i)(index), \
7770 (__v16si)(__m512i)(v1), (int)(scale))
7771
7772#define _mm512_i32scatter_pd(addr, index, v1, scale) \
7773 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)-1, \
7774 (__v8si)(__m256i)(index), \
7775 (__v8df)(__m512d)(v1), (int)(scale))
7776
7777#define _mm512_mask_i32scatter_pd(addr, mask, index, v1, scale) \
7778 __builtin_ia32_scattersiv8df((void *)(addr), (__mmask8)(mask), \
7779 (__v8si)(__m256i)(index), \
7780 (__v8df)(__m512d)(v1), (int)(scale))
7781
7782#define _mm512_i32scatter_epi64(addr, index, v1, scale) \
7783 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)-1, \
7784 (__v8si)(__m256i)(index), \
7785 (__v8di)(__m512i)(v1), (int)(scale))
7786
7787#define _mm512_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
7788 __builtin_ia32_scattersiv8di((void *)(addr), (__mmask8)(mask), \
7789 (__v8si)(__m256i)(index), \
7790 (__v8di)(__m512i)(v1), (int)(scale))
7791
7792static __inline__ __m128 __DEFAULT_FN_ATTRS128
7793_mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7794{
7795 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7796 (__v4sf)__A,
7797 (__v4sf)__B,
7798 (__mmask8)__U,
7800}
7801
7802#define _mm_fmadd_round_ss(A, B, C, R) \
7803 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7804 (__v4sf)(__m128)(B), \
7805 (__v4sf)(__m128)(C), (__mmask8)-1, \
7806 (int)(R))
7807
7808#define _mm_mask_fmadd_round_ss(W, U, A, B, R) \
7809 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7810 (__v4sf)(__m128)(A), \
7811 (__v4sf)(__m128)(B), (__mmask8)(U), \
7812 (int)(R))
7813
7814static __inline__ __m128 __DEFAULT_FN_ATTRS128
7815_mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7816{
7817 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7818 (__v4sf)__B,
7819 (__v4sf)__C,
7820 (__mmask8)__U,
7822}
7823
7824#define _mm_maskz_fmadd_round_ss(U, A, B, C, R) \
7825 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7826 (__v4sf)(__m128)(B), \
7827 (__v4sf)(__m128)(C), (__mmask8)(U), \
7828 (int)(R))
7829
7830static __inline__ __m128 __DEFAULT_FN_ATTRS128
7831_mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7832{
7833 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7834 (__v4sf)__X,
7835 (__v4sf)__Y,
7836 (__mmask8)__U,
7838}
7839
7840#define _mm_mask3_fmadd_round_ss(W, X, Y, U, R) \
7841 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7842 (__v4sf)(__m128)(X), \
7843 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7844 (int)(R))
7845
7846static __inline__ __m128 __DEFAULT_FN_ATTRS128
7847_mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7848{
7849 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7850 (__v4sf)__A,
7851 -(__v4sf)__B,
7852 (__mmask8)__U,
7854}
7855
7856#define _mm_fmsub_round_ss(A, B, C, R) \
7857 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7858 (__v4sf)(__m128)(B), \
7859 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7860 (int)(R))
7861
7862#define _mm_mask_fmsub_round_ss(W, U, A, B, R) \
7863 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7864 (__v4sf)(__m128)(A), \
7865 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7866 (int)(R))
7867
7868static __inline__ __m128 __DEFAULT_FN_ATTRS128
7869_mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7870{
7871 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7872 (__v4sf)__B,
7873 -(__v4sf)__C,
7874 (__mmask8)__U,
7876}
7877
7878#define _mm_maskz_fmsub_round_ss(U, A, B, C, R) \
7879 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7880 (__v4sf)(__m128)(B), \
7881 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7882 (int)(R))
7883
7884static __inline__ __m128 __DEFAULT_FN_ATTRS128
7885_mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7886{
7887 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7888 (__v4sf)__X,
7889 (__v4sf)__Y,
7890 (__mmask8)__U,
7892}
7893
7894#define _mm_mask3_fmsub_round_ss(W, X, Y, U, R) \
7895 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
7896 (__v4sf)(__m128)(X), \
7897 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7898 (int)(R))
7899
7900static __inline__ __m128 __DEFAULT_FN_ATTRS128
7901_mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7902{
7903 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7904 -(__v4sf)__A,
7905 (__v4sf)__B,
7906 (__mmask8)__U,
7908}
7909
7910#define _mm_fnmadd_round_ss(A, B, C, R) \
7911 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7912 -(__v4sf)(__m128)(B), \
7913 (__v4sf)(__m128)(C), (__mmask8)-1, \
7914 (int)(R))
7915
7916#define _mm_mask_fnmadd_round_ss(W, U, A, B, R) \
7917 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7918 -(__v4sf)(__m128)(A), \
7919 (__v4sf)(__m128)(B), (__mmask8)(U), \
7920 (int)(R))
7921
7922static __inline__ __m128 __DEFAULT_FN_ATTRS128
7923_mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7924{
7925 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7926 -(__v4sf)__B,
7927 (__v4sf)__C,
7928 (__mmask8)__U,
7930}
7931
7932#define _mm_maskz_fnmadd_round_ss(U, A, B, C, R) \
7933 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7934 -(__v4sf)(__m128)(B), \
7935 (__v4sf)(__m128)(C), (__mmask8)(U), \
7936 (int)(R))
7937
7938static __inline__ __m128 __DEFAULT_FN_ATTRS128
7939_mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7940{
7941 return __builtin_ia32_vfmaddss3_mask3((__v4sf)__W,
7942 -(__v4sf)__X,
7943 (__v4sf)__Y,
7944 (__mmask8)__U,
7946}
7947
7948#define _mm_mask3_fnmadd_round_ss(W, X, Y, U, R) \
7949 (__m128)__builtin_ia32_vfmaddss3_mask3((__v4sf)(__m128)(W), \
7950 -(__v4sf)(__m128)(X), \
7951 (__v4sf)(__m128)(Y), (__mmask8)(U), \
7952 (int)(R))
7953
7954static __inline__ __m128 __DEFAULT_FN_ATTRS128
7955_mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
7956{
7957 return __builtin_ia32_vfmaddss3_mask((__v4sf)__W,
7958 -(__v4sf)__A,
7959 -(__v4sf)__B,
7960 (__mmask8)__U,
7962}
7963
7964#define _mm_fnmsub_round_ss(A, B, C, R) \
7965 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(A), \
7966 -(__v4sf)(__m128)(B), \
7967 -(__v4sf)(__m128)(C), (__mmask8)-1, \
7968 (int)(R))
7969
7970#define _mm_mask_fnmsub_round_ss(W, U, A, B, R) \
7971 (__m128)__builtin_ia32_vfmaddss3_mask((__v4sf)(__m128)(W), \
7972 -(__v4sf)(__m128)(A), \
7973 -(__v4sf)(__m128)(B), (__mmask8)(U), \
7974 (int)(R))
7975
7976static __inline__ __m128 __DEFAULT_FN_ATTRS128
7977_mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
7978{
7979 return __builtin_ia32_vfmaddss3_maskz((__v4sf)__A,
7980 -(__v4sf)__B,
7981 -(__v4sf)__C,
7982 (__mmask8)__U,
7984}
7985
7986#define _mm_maskz_fnmsub_round_ss(U, A, B, C, R) \
7987 (__m128)__builtin_ia32_vfmaddss3_maskz((__v4sf)(__m128)(A), \
7988 -(__v4sf)(__m128)(B), \
7989 -(__v4sf)(__m128)(C), (__mmask8)(U), \
7990 (int)(R))
7991
7992static __inline__ __m128 __DEFAULT_FN_ATTRS128
7993_mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
7994{
7995 return __builtin_ia32_vfmsubss3_mask3((__v4sf)__W,
7996 -(__v4sf)__X,
7997 (__v4sf)__Y,
7998 (__mmask8)__U,
8000}
8001
8002#define _mm_mask3_fnmsub_round_ss(W, X, Y, U, R) \
8003 (__m128)__builtin_ia32_vfmsubss3_mask3((__v4sf)(__m128)(W), \
8004 -(__v4sf)(__m128)(X), \
8005 (__v4sf)(__m128)(Y), (__mmask8)(U), \
8006 (int)(R))
8007
8008static __inline__ __m128d __DEFAULT_FN_ATTRS128
8009_mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8010{
8011 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8012 (__v2df)__A,
8013 (__v2df)__B,
8014 (__mmask8)__U,
8016}
8017
8018#define _mm_fmadd_round_sd(A, B, C, R) \
8019 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8020 (__v2df)(__m128d)(B), \
8021 (__v2df)(__m128d)(C), (__mmask8)-1, \
8022 (int)(R))
8023
8024#define _mm_mask_fmadd_round_sd(W, U, A, B, R) \
8025 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8026 (__v2df)(__m128d)(A), \
8027 (__v2df)(__m128d)(B), (__mmask8)(U), \
8028 (int)(R))
8029
8030static __inline__ __m128d __DEFAULT_FN_ATTRS128
8031_mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8032{
8033 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8034 (__v2df)__B,
8035 (__v2df)__C,
8036 (__mmask8)__U,
8038}
8039
8040#define _mm_maskz_fmadd_round_sd(U, A, B, C, R) \
8041 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8042 (__v2df)(__m128d)(B), \
8043 (__v2df)(__m128d)(C), (__mmask8)(U), \
8044 (int)(R))
8045
8046static __inline__ __m128d __DEFAULT_FN_ATTRS128
8047_mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8048{
8049 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8050 (__v2df)__X,
8051 (__v2df)__Y,
8052 (__mmask8)__U,
8054}
8055
8056#define _mm_mask3_fmadd_round_sd(W, X, Y, U, R) \
8057 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8058 (__v2df)(__m128d)(X), \
8059 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8060 (int)(R))
8061
8062static __inline__ __m128d __DEFAULT_FN_ATTRS128
8063_mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8064{
8065 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8066 (__v2df)__A,
8067 -(__v2df)__B,
8068 (__mmask8)__U,
8070}
8071
8072#define _mm_fmsub_round_sd(A, B, C, R) \
8073 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8074 (__v2df)(__m128d)(B), \
8075 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8076 (int)(R))
8077
8078#define _mm_mask_fmsub_round_sd(W, U, A, B, R) \
8079 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8080 (__v2df)(__m128d)(A), \
8081 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8082 (int)(R))
8083
8084static __inline__ __m128d __DEFAULT_FN_ATTRS128
8085_mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8086{
8087 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8088 (__v2df)__B,
8089 -(__v2df)__C,
8090 (__mmask8)__U,
8092}
8093
8094#define _mm_maskz_fmsub_round_sd(U, A, B, C, R) \
8095 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8096 (__v2df)(__m128d)(B), \
8097 -(__v2df)(__m128d)(C), \
8098 (__mmask8)(U), (int)(R))
8099
8100static __inline__ __m128d __DEFAULT_FN_ATTRS128
8101_mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8102{
8103 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8104 (__v2df)__X,
8105 (__v2df)__Y,
8106 (__mmask8)__U,
8108}
8109
8110#define _mm_mask3_fmsub_round_sd(W, X, Y, U, R) \
8111 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8112 (__v2df)(__m128d)(X), \
8113 (__v2df)(__m128d)(Y), \
8114 (__mmask8)(U), (int)(R))
8115
8116static __inline__ __m128d __DEFAULT_FN_ATTRS128
8117_mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8118{
8119 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8120 -(__v2df)__A,
8121 (__v2df)__B,
8122 (__mmask8)__U,
8124}
8125
8126#define _mm_fnmadd_round_sd(A, B, C, R) \
8127 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8128 -(__v2df)(__m128d)(B), \
8129 (__v2df)(__m128d)(C), (__mmask8)-1, \
8130 (int)(R))
8131
8132#define _mm_mask_fnmadd_round_sd(W, U, A, B, R) \
8133 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8134 -(__v2df)(__m128d)(A), \
8135 (__v2df)(__m128d)(B), (__mmask8)(U), \
8136 (int)(R))
8137
8138static __inline__ __m128d __DEFAULT_FN_ATTRS128
8139_mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8140{
8141 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8142 -(__v2df)__B,
8143 (__v2df)__C,
8144 (__mmask8)__U,
8146}
8147
8148#define _mm_maskz_fnmadd_round_sd(U, A, B, C, R) \
8149 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8150 -(__v2df)(__m128d)(B), \
8151 (__v2df)(__m128d)(C), (__mmask8)(U), \
8152 (int)(R))
8153
8154static __inline__ __m128d __DEFAULT_FN_ATTRS128
8155_mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8156{
8157 return __builtin_ia32_vfmaddsd3_mask3((__v2df)__W,
8158 -(__v2df)__X,
8159 (__v2df)__Y,
8160 (__mmask8)__U,
8162}
8163
8164#define _mm_mask3_fnmadd_round_sd(W, X, Y, U, R) \
8165 (__m128d)__builtin_ia32_vfmaddsd3_mask3((__v2df)(__m128d)(W), \
8166 -(__v2df)(__m128d)(X), \
8167 (__v2df)(__m128d)(Y), (__mmask8)(U), \
8168 (int)(R))
8169
8170static __inline__ __m128d __DEFAULT_FN_ATTRS128
8171_mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8172{
8173 return __builtin_ia32_vfmaddsd3_mask((__v2df)__W,
8174 -(__v2df)__A,
8175 -(__v2df)__B,
8176 (__mmask8)__U,
8178}
8179
8180#define _mm_fnmsub_round_sd(A, B, C, R) \
8181 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(A), \
8182 -(__v2df)(__m128d)(B), \
8183 -(__v2df)(__m128d)(C), (__mmask8)-1, \
8184 (int)(R))
8185
8186#define _mm_mask_fnmsub_round_sd(W, U, A, B, R) \
8187 (__m128d)__builtin_ia32_vfmaddsd3_mask((__v2df)(__m128d)(W), \
8188 -(__v2df)(__m128d)(A), \
8189 -(__v2df)(__m128d)(B), (__mmask8)(U), \
8190 (int)(R))
8191
8192static __inline__ __m128d __DEFAULT_FN_ATTRS128
8193_mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
8194{
8195 return __builtin_ia32_vfmaddsd3_maskz((__v2df)__A,
8196 -(__v2df)__B,
8197 -(__v2df)__C,
8198 (__mmask8)__U,
8200}
8201
8202#define _mm_maskz_fnmsub_round_sd(U, A, B, C, R) \
8203 (__m128d)__builtin_ia32_vfmaddsd3_maskz((__v2df)(__m128d)(A), \
8204 -(__v2df)(__m128d)(B), \
8205 -(__v2df)(__m128d)(C), \
8206 (__mmask8)(U), \
8207 (int)(R))
8208
8209static __inline__ __m128d __DEFAULT_FN_ATTRS128
8210_mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
8211{
8212 return __builtin_ia32_vfmsubsd3_mask3((__v2df)__W,
8213 -(__v2df)__X,
8214 (__v2df)__Y,
8215 (__mmask8)__U,
8217}
8218
8219#define _mm_mask3_fnmsub_round_sd(W, X, Y, U, R) \
8220 (__m128d)__builtin_ia32_vfmsubsd3_mask3((__v2df)(__m128d)(W), \
8221 -(__v2df)(__m128d)(X), \
8222 (__v2df)(__m128d)(Y), \
8223 (__mmask8)(U), (int)(R))
8224
8225#define _mm512_permutex_pd(X, C) \
8226 (__m512d)__builtin_ia32_permdf512((__v8df)(__m512d)(X), (int)(C))
8227
8228#define _mm512_mask_permutex_pd(W, U, X, C) \
8229 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8230 (__v8df)_mm512_permutex_pd((X), (C)), \
8231 (__v8df)(__m512d)(W))
8232
8233#define _mm512_maskz_permutex_pd(U, X, C) \
8234 (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
8235 (__v8df)_mm512_permutex_pd((X), (C)), \
8236 (__v8df)_mm512_setzero_pd())
8237
8238#define _mm512_permutex_epi64(X, C) \
8239 (__m512i)__builtin_ia32_permdi512((__v8di)(__m512i)(X), (int)(C))
8240
8241#define _mm512_mask_permutex_epi64(W, U, X, C) \
8242 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8243 (__v8di)_mm512_permutex_epi64((X), (C)), \
8244 (__v8di)(__m512i)(W))
8245
8246#define _mm512_maskz_permutex_epi64(U, X, C) \
8247 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
8248 (__v8di)_mm512_permutex_epi64((X), (C)), \
8249 (__v8di)_mm512_setzero_si512())
8250
8251static __inline__ __m512d __DEFAULT_FN_ATTRS512
8252_mm512_permutexvar_pd (__m512i __X, __m512d __Y)
8253{
8254 return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
8255}
8256
8257static __inline__ __m512d __DEFAULT_FN_ATTRS512
8258_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
8259{
8260 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8261 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8262 (__v8df)__W);
8263}
8264
8265static __inline__ __m512d __DEFAULT_FN_ATTRS512
8266_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
8267{
8268 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8269 (__v8df)_mm512_permutexvar_pd(__X, __Y),
8270 (__v8df)_mm512_setzero_pd());
8271}
8272
8273static __inline__ __m512i __DEFAULT_FN_ATTRS512
8274_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
8275{
8276 return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
8277}
8278
8279static __inline__ __m512i __DEFAULT_FN_ATTRS512
8280_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
8281{
8282 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8283 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8284 (__v8di)_mm512_setzero_si512());
8285}
8286
8287static __inline__ __m512i __DEFAULT_FN_ATTRS512
8288_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
8289 __m512i __Y)
8290{
8291 return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
8292 (__v8di)_mm512_permutexvar_epi64(__X, __Y),
8293 (__v8di)__W);
8294}
8295
8296static __inline__ __m512 __DEFAULT_FN_ATTRS512
8297_mm512_permutexvar_ps (__m512i __X, __m512 __Y)
8298{
8299 return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
8300}
8301
8302static __inline__ __m512 __DEFAULT_FN_ATTRS512
8303_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
8304{
8305 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8306 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8307 (__v16sf)__W);
8308}
8309
8310static __inline__ __m512 __DEFAULT_FN_ATTRS512
8311_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
8312{
8313 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8314 (__v16sf)_mm512_permutexvar_ps(__X, __Y),
8315 (__v16sf)_mm512_setzero_ps());
8316}
8317
8318static __inline__ __m512i __DEFAULT_FN_ATTRS512
8319_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
8320{
8321 return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
8322}
8323
8324#define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
8325
8326static __inline__ __m512i __DEFAULT_FN_ATTRS512
8327_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
8328{
8329 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8330 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8331 (__v16si)_mm512_setzero_si512());
8332}
8333
8334static __inline__ __m512i __DEFAULT_FN_ATTRS512
8335_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
8336 __m512i __Y)
8337{
8338 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
8339 (__v16si)_mm512_permutexvar_epi32(__X, __Y),
8340 (__v16si)__W);
8341}
8342
8343#define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
8344
8345static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8347{
8348 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
8349}
8350
8351static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8353{
8354 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
8355}
8356
8357static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8359{
8360 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
8361}
8362
8363static __inline__ int __DEFAULT_FN_ATTRS
8365{
8366 return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
8367}
8368
8369static __inline__ int __DEFAULT_FN_ATTRS
8371{
8372 return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
8373}
8374
8375static __inline__ unsigned char __DEFAULT_FN_ATTRS
8377{
8378 return (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8379}
8380
8381static __inline__ unsigned char __DEFAULT_FN_ATTRS
8383{
8384 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8385}
8386
8387static __inline__ unsigned char __DEFAULT_FN_ATTRS
8388_kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
8389 *__C = (unsigned char)__builtin_ia32_kortestchi(__A, __B);
8390 return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
8391}
8392
8393static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8395{
8396 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
8397}
8398
8399static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8401{
8402 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
8403}
8404
8405static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8407{
8408 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
8409}
8410
8411#define _kand_mask16 _mm512_kand
8412#define _kandn_mask16 _mm512_kandn
8413#define _knot_mask16 _mm512_knot
8414#define _kor_mask16 _mm512_kor
8415#define _kxnor_mask16 _mm512_kxnor
8416#define _kxor_mask16 _mm512_kxor
8417
8418#define _kshiftli_mask16(A, I) \
8419 (__mmask16)__builtin_ia32_kshiftlihi((__mmask16)(A), (unsigned int)(I))
8420
8421#define _kshiftri_mask16(A, I) \
8422 (__mmask16)__builtin_ia32_kshiftrihi((__mmask16)(A), (unsigned int)(I))
8423
8424static __inline__ unsigned int __DEFAULT_FN_ATTRS
8426 return (unsigned int)__builtin_ia32_kmovw((__mmask16)__A);
8427}
8428
8429static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8430_cvtu32_mask16(unsigned int __A) {
8431 return (__mmask16)__builtin_ia32_kmovw((__mmask16)__A);
8432}
8433
8434static __inline__ __mmask16 __DEFAULT_FN_ATTRS
8436 return (__mmask16)__builtin_ia32_kmovw(*(__mmask16 *)__A);
8437}
8438
8439static __inline__ void __DEFAULT_FN_ATTRS
8441 *(__mmask16 *)__A = __builtin_ia32_kmovw((__mmask16)__B);
8442}
8443
8444static __inline__ void __DEFAULT_FN_ATTRS512
8445_mm512_stream_si512 (void * __P, __m512i __A)
8446{
8447 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8448 __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
8449}
8450
8451static __inline__ __m512i __DEFAULT_FN_ATTRS512
8453{
8454 typedef __v8di __v8di_aligned __attribute__((aligned(64)));
8455 return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
8456}
8457
8458static __inline__ void __DEFAULT_FN_ATTRS512
8459_mm512_stream_pd (void *__P, __m512d __A)
8460{
8461 typedef __v8df __v8df_aligned __attribute__((aligned(64)));
8462 __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
8463}
8464
8465static __inline__ void __DEFAULT_FN_ATTRS512
8466_mm512_stream_ps (void *__P, __m512 __A)
8467{
8468 typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
8469 __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
8470}
8471
8472static __inline__ __m512d __DEFAULT_FN_ATTRS512
8473_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
8474{
8475 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8476 (__v8df) __W,
8477 (__mmask8) __U);
8478}
8479
8480static __inline__ __m512d __DEFAULT_FN_ATTRS512
8482{
8483 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
8484 (__v8df)
8486 (__mmask8) __U);
8487}
8488
8489static __inline__ __m512i __DEFAULT_FN_ATTRS512
8490_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8491{
8492 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8493 (__v8di) __W,
8494 (__mmask8) __U);
8495}
8496
8497static __inline__ __m512i __DEFAULT_FN_ATTRS512
8499{
8500 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
8501 (__v8di)
8503 (__mmask8) __U);
8504}
8505
8506static __inline__ __m512 __DEFAULT_FN_ATTRS512
8507_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
8508{
8509 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8510 (__v16sf) __W,
8511 (__mmask16) __U);
8512}
8513
8514static __inline__ __m512 __DEFAULT_FN_ATTRS512
8516{
8517 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
8518 (__v16sf)
8520 (__mmask16) __U);
8521}
8522
8523static __inline__ __m512i __DEFAULT_FN_ATTRS512
8524_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8525{
8526 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8527 (__v16si) __W,
8528 (__mmask16) __U);
8529}
8530
8531static __inline__ __m512i __DEFAULT_FN_ATTRS512
8533{
8534 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
8535 (__v16si)
8537 (__mmask16) __U);
8538}
8539
8540#define _mm_cmp_round_ss_mask(X, Y, P, R) \
8541 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8542 (__v4sf)(__m128)(Y), (int)(P), \
8543 (__mmask8)-1, (int)(R))
8544
8545#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \
8546 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8547 (__v4sf)(__m128)(Y), (int)(P), \
8548 (__mmask8)(M), (int)(R))
8549
8550#define _mm_cmp_ss_mask(X, Y, P) \
8551 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8552 (__v4sf)(__m128)(Y), (int)(P), \
8553 (__mmask8)-1, \
8554 _MM_FROUND_CUR_DIRECTION)
8555
8556#define _mm_mask_cmp_ss_mask(M, X, Y, P) \
8557 (__mmask8)__builtin_ia32_cmpss_mask((__v4sf)(__m128)(X), \
8558 (__v4sf)(__m128)(Y), (int)(P), \
8559 (__mmask8)(M), \
8560 _MM_FROUND_CUR_DIRECTION)
8561
8562#define _mm_cmp_round_sd_mask(X, Y, P, R) \
8563 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8564 (__v2df)(__m128d)(Y), (int)(P), \
8565 (__mmask8)-1, (int)(R))
8566
8567#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \
8568 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8569 (__v2df)(__m128d)(Y), (int)(P), \
8570 (__mmask8)(M), (int)(R))
8571
8572#define _mm_cmp_sd_mask(X, Y, P) \
8573 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8574 (__v2df)(__m128d)(Y), (int)(P), \
8575 (__mmask8)-1, \
8576 _MM_FROUND_CUR_DIRECTION)
8577
8578#define _mm_mask_cmp_sd_mask(M, X, Y, P) \
8579 (__mmask8)__builtin_ia32_cmpsd_mask((__v2df)(__m128d)(X), \
8580 (__v2df)(__m128d)(Y), (int)(P), \
8581 (__mmask8)(M), \
8582 _MM_FROUND_CUR_DIRECTION)
8583
8584/* Bit Test */
8585
8586static __inline __mmask16 __DEFAULT_FN_ATTRS512
8587_mm512_test_epi32_mask (__m512i __A, __m512i __B)
8588{
8591}
8592
8593static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8594_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8595{
8596 return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8598}
8599
8600static __inline __mmask8 __DEFAULT_FN_ATTRS512
8601_mm512_test_epi64_mask (__m512i __A, __m512i __B)
8602{
8603 return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
8605}
8606
8607static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8608_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8609{
8610 return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8612}
8613
8614static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8615_mm512_testn_epi32_mask (__m512i __A, __m512i __B)
8616{
8617 return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
8619}
8620
8621static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
8622_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
8623{
8624 return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
8626}
8627
8628static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8629_mm512_testn_epi64_mask (__m512i __A, __m512i __B)
8630{
8631 return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
8633}
8634
8635static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
8636_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
8637{
8638 return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
8640}
8641
8642static __inline__ __m512 __DEFAULT_FN_ATTRS512
8644{
8645 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8646 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
8647}
8648
8649static __inline__ __m512 __DEFAULT_FN_ATTRS512
8650_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8651{
8652 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8653 (__v16sf)_mm512_movehdup_ps(__A),
8654 (__v16sf)__W);
8655}
8656
8657static __inline__ __m512 __DEFAULT_FN_ATTRS512
8659{
8660 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8661 (__v16sf)_mm512_movehdup_ps(__A),
8662 (__v16sf)_mm512_setzero_ps());
8663}
8664
8665static __inline__ __m512 __DEFAULT_FN_ATTRS512
8667{
8668 return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
8669 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
8670}
8671
8672static __inline__ __m512 __DEFAULT_FN_ATTRS512
8673_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
8674{
8675 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8676 (__v16sf)_mm512_moveldup_ps(__A),
8677 (__v16sf)__W);
8678}
8679
8680static __inline__ __m512 __DEFAULT_FN_ATTRS512
8682{
8683 return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
8684 (__v16sf)_mm512_moveldup_ps(__A),
8685 (__v16sf)_mm512_setzero_ps());
8686}
8687
8688static __inline__ __m128 __DEFAULT_FN_ATTRS128
8689_mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
8690{
8691 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
8692}
8693
8694static __inline__ __m128 __DEFAULT_FN_ATTRS128
8695_mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
8696{
8697 return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
8698 _mm_setzero_ps());
8699}
8700
8701static __inline__ __m128d __DEFAULT_FN_ATTRS128
8702_mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
8703{
8704 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
8705}
8706
8707static __inline__ __m128d __DEFAULT_FN_ATTRS128
8708_mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
8709{
8710 return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
8711 _mm_setzero_pd());
8712}
8713
8714static __inline__ void __DEFAULT_FN_ATTRS128
8715_mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
8716{
8717 __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
8718}
8719
8720static __inline__ void __DEFAULT_FN_ATTRS128
8721_mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
8722{
8723 __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
8724}
8725
8726static __inline__ __m128 __DEFAULT_FN_ATTRS128
8727_mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
8728{
8729 __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
8730 (__v4sf)_mm_setzero_ps(),
8731 0, 4, 4, 4);
8732
8733 return (__m128) __builtin_ia32_loadss128_mask ((const __v4sf *) __A, src, __U & 1);
8734}
8735
8736static __inline__ __m128 __DEFAULT_FN_ATTRS128
8737_mm_maskz_load_ss (__mmask8 __U, const float* __A)
8738{
8739 return (__m128)__builtin_ia32_loadss128_mask ((const __v4sf *) __A,
8740 (__v4sf) _mm_setzero_ps(),
8741 __U & 1);
8742}
8743
8744static __inline__ __m128d __DEFAULT_FN_ATTRS128
8745_mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
8746{
8747 __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
8748 (__v2df)_mm_setzero_pd(),
8749 0, 2);
8750
8751 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A, src, __U & 1);
8752}
8753
8754static __inline__ __m128d __DEFAULT_FN_ATTRS128
8755_mm_maskz_load_sd (__mmask8 __U, const double* __A)
8756{
8757 return (__m128d) __builtin_ia32_loadsd128_mask ((const __v2df *) __A,
8758 (__v2df) _mm_setzero_pd(),
8759 __U & 1);
8760}
8761
8762#define _mm512_shuffle_epi32(A, I) \
8763 (__m512i)__builtin_ia32_pshufd512((__v16si)(__m512i)(A), (int)(I))
8764
8765#define _mm512_mask_shuffle_epi32(W, U, A, I) \
8766 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8767 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8768 (__v16si)(__m512i)(W))
8769
8770#define _mm512_maskz_shuffle_epi32(U, A, I) \
8771 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
8772 (__v16si)_mm512_shuffle_epi32((A), (I)), \
8773 (__v16si)_mm512_setzero_si512())
8774
8775static __inline__ __m512d __DEFAULT_FN_ATTRS512
8776_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
8777{
8778 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8779 (__v8df) __W,
8780 (__mmask8) __U);
8781}
8782
8783static __inline__ __m512d __DEFAULT_FN_ATTRS512
8785{
8786 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
8787 (__v8df) _mm512_setzero_pd (),
8788 (__mmask8) __U);
8789}
8790
8791static __inline__ __m512i __DEFAULT_FN_ATTRS512
8792_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
8793{
8794 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8795 (__v8di) __W,
8796 (__mmask8) __U);
8797}
8798
8799static __inline__ __m512i __DEFAULT_FN_ATTRS512
8801{
8802 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
8803 (__v8di) _mm512_setzero_si512 (),
8804 (__mmask8) __U);
8805}
8806
8807static __inline__ __m512d __DEFAULT_FN_ATTRS512
8808_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
8809{
8810 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8811 (__v8df) __W,
8812 (__mmask8) __U);
8813}
8814
8815static __inline__ __m512d __DEFAULT_FN_ATTRS512
8817{
8818 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
8819 (__v8df) _mm512_setzero_pd(),
8820 (__mmask8) __U);
8821}
8822
8823static __inline__ __m512i __DEFAULT_FN_ATTRS512
8824_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
8825{
8826 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8827 (__v8di) __W,
8828 (__mmask8) __U);
8829}
8830
8831static __inline__ __m512i __DEFAULT_FN_ATTRS512
8833{
8834 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
8835 (__v8di) _mm512_setzero_si512(),
8836 (__mmask8) __U);
8837}
8838
8839static __inline__ __m512 __DEFAULT_FN_ATTRS512
8840_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
8841{
8842 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8843 (__v16sf) __W,
8844 (__mmask16) __U);
8845}
8846
8847static __inline__ __m512 __DEFAULT_FN_ATTRS512
8849{
8850 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
8851 (__v16sf) _mm512_setzero_ps(),
8852 (__mmask16) __U);
8853}
8854
8855static __inline__ __m512i __DEFAULT_FN_ATTRS512
8856_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
8857{
8858 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8859 (__v16si) __W,
8860 (__mmask16) __U);
8861}
8862
8863static __inline__ __m512i __DEFAULT_FN_ATTRS512
8865{
8866 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
8867 (__v16si) _mm512_setzero_si512(),
8868 (__mmask16) __U);
8869}
8870
8871static __inline__ __m512 __DEFAULT_FN_ATTRS512
8872_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
8873{
8874 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8875 (__v16sf) __W,
8876 (__mmask16) __U);
8877}
8878
8879static __inline__ __m512 __DEFAULT_FN_ATTRS512
8881{
8882 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
8883 (__v16sf) _mm512_setzero_ps(),
8884 (__mmask16) __U);
8885}
8886
8887static __inline__ __m512i __DEFAULT_FN_ATTRS512
8888_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
8889{
8890 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8891 (__v16si) __W,
8892 (__mmask16) __U);
8893}
8894
8895static __inline__ __m512i __DEFAULT_FN_ATTRS512
8897{
8898 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
8899 (__v16si) _mm512_setzero_si512(),
8900 (__mmask16) __U);
8901}
8902
8903#define _mm512_cvt_roundps_pd(A, R) \
8904 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8905 (__v8df)_mm512_undefined_pd(), \
8906 (__mmask8)-1, (int)(R))
8907
8908#define _mm512_mask_cvt_roundps_pd(W, U, A, R) \
8909 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8910 (__v8df)(__m512d)(W), \
8911 (__mmask8)(U), (int)(R))
8912
8913#define _mm512_maskz_cvt_roundps_pd(U, A, R) \
8914 (__m512d)__builtin_ia32_cvtps2pd512_mask((__v8sf)(__m256)(A), \
8915 (__v8df)_mm512_setzero_pd(), \
8916 (__mmask8)(U), (int)(R))
8917
8918static __inline__ __m512d __DEFAULT_FN_ATTRS512
8920{
8921 return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
8922}
8923
8924static __inline__ __m512d __DEFAULT_FN_ATTRS512
8925_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
8926{
8927 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8928 (__v8df)_mm512_cvtps_pd(__A),
8929 (__v8df)__W);
8930}
8931
8932static __inline__ __m512d __DEFAULT_FN_ATTRS512
8934{
8935 return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
8936 (__v8df)_mm512_cvtps_pd(__A),
8937 (__v8df)_mm512_setzero_pd());
8938}
8939
8940static __inline__ __m512d __DEFAULT_FN_ATTRS512
8942{
8943 return (__m512d) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
8944}
8945
8946static __inline__ __m512d __DEFAULT_FN_ATTRS512
8947_mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
8948{
8949 return (__m512d) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
8950}
8951
8952static __inline__ __m512d __DEFAULT_FN_ATTRS512
8953_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
8954{
8955 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8956 (__v8df) __A,
8957 (__v8df) __W);
8958}
8959
8960static __inline__ __m512d __DEFAULT_FN_ATTRS512
8962{
8963 return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
8964 (__v8df) __A,
8965 (__v8df) _mm512_setzero_pd ());
8966}
8967
8968static __inline__ __m512 __DEFAULT_FN_ATTRS512
8969_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
8970{
8971 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8972 (__v16sf) __A,
8973 (__v16sf) __W);
8974}
8975
8976static __inline__ __m512 __DEFAULT_FN_ATTRS512
8978{
8979 return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
8980 (__v16sf) __A,
8981 (__v16sf) _mm512_setzero_ps ());
8982}
8983
8984static __inline__ void __DEFAULT_FN_ATTRS512
8985_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
8986{
8987 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
8988 (__mmask8) __U);
8989}
8990
8991static __inline__ void __DEFAULT_FN_ATTRS512
8992_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
8993{
8994 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
8995 (__mmask8) __U);
8996}
8997
8998static __inline__ void __DEFAULT_FN_ATTRS512
8999_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
9000{
9001 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
9002 (__mmask16) __U);
9003}
9004
9005static __inline__ void __DEFAULT_FN_ATTRS512
9006_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
9007{
9008 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
9009 (__mmask16) __U);
9010}
9011
9012#define _mm_cvt_roundsd_ss(A, B, R) \
9013 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9014 (__v2df)(__m128d)(B), \
9015 (__v4sf)_mm_undefined_ps(), \
9016 (__mmask8)-1, (int)(R))
9017
9018#define _mm_mask_cvt_roundsd_ss(W, U, A, B, R) \
9019 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9020 (__v2df)(__m128d)(B), \
9021 (__v4sf)(__m128)(W), \
9022 (__mmask8)(U), (int)(R))
9023
9024#define _mm_maskz_cvt_roundsd_ss(U, A, B, R) \
9025 (__m128)__builtin_ia32_cvtsd2ss_round_mask((__v4sf)(__m128)(A), \
9026 (__v2df)(__m128d)(B), \
9027 (__v4sf)_mm_setzero_ps(), \
9028 (__mmask8)(U), (int)(R))
9029
9030static __inline__ __m128 __DEFAULT_FN_ATTRS128
9031_mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
9032{
9033 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9034 (__v2df)__B,
9035 (__v4sf)__W,
9037}
9038
9039static __inline__ __m128 __DEFAULT_FN_ATTRS128
9040_mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
9041{
9042 return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
9043 (__v2df)__B,
9044 (__v4sf)_mm_setzero_ps(),
9046}
9047
9048#define _mm_cvtss_i32 _mm_cvtss_si32
9049#define _mm_cvtsd_i32 _mm_cvtsd_si32
9050#define _mm_cvti32_sd _mm_cvtsi32_sd
9051#define _mm_cvti32_ss _mm_cvtsi32_ss
9052#ifdef __x86_64__
9053#define _mm_cvtss_i64 _mm_cvtss_si64
9054#define _mm_cvtsd_i64 _mm_cvtsd_si64
9055#define _mm_cvti64_sd _mm_cvtsi64_sd
9056#define _mm_cvti64_ss _mm_cvtsi64_ss
9057#endif
9058
9059#ifdef __x86_64__
9060#define _mm_cvt_roundi64_sd(A, B, R) \
9061 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9062 (int)(R))
9063
9064#define _mm_cvt_roundsi64_sd(A, B, R) \
9065 (__m128d)__builtin_ia32_cvtsi2sd64((__v2df)(__m128d)(A), (long long)(B), \
9066 (int)(R))
9067#endif
9068
9069#define _mm_cvt_roundsi32_ss(A, B, R) \
9070 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9071
9072#define _mm_cvt_roundi32_ss(A, B, R) \
9073 (__m128)__builtin_ia32_cvtsi2ss32((__v4sf)(__m128)(A), (int)(B), (int)(R))
9074
9075#ifdef __x86_64__
9076#define _mm_cvt_roundsi64_ss(A, B, R) \
9077 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9078 (int)(R))
9079
9080#define _mm_cvt_roundi64_ss(A, B, R) \
9081 (__m128)__builtin_ia32_cvtsi2ss64((__v4sf)(__m128)(A), (long long)(B), \
9082 (int)(R))
9083#endif
9084
9085#define _mm_cvt_roundss_sd(A, B, R) \
9086 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9087 (__v4sf)(__m128)(B), \
9088 (__v2df)_mm_undefined_pd(), \
9089 (__mmask8)-1, (int)(R))
9090
9091#define _mm_mask_cvt_roundss_sd(W, U, A, B, R) \
9092 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9093 (__v4sf)(__m128)(B), \
9094 (__v2df)(__m128d)(W), \
9095 (__mmask8)(U), (int)(R))
9096
9097#define _mm_maskz_cvt_roundss_sd(U, A, B, R) \
9098 (__m128d)__builtin_ia32_cvtss2sd_round_mask((__v2df)(__m128d)(A), \
9099 (__v4sf)(__m128)(B), \
9100 (__v2df)_mm_setzero_pd(), \
9101 (__mmask8)(U), (int)(R))
9102
9103static __inline__ __m128d __DEFAULT_FN_ATTRS128
9104_mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
9105{
9106 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9107 (__v4sf)__B,
9108 (__v2df)__W,
9110}
9111
9112static __inline__ __m128d __DEFAULT_FN_ATTRS128
9113_mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
9114{
9115 return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
9116 (__v4sf)__B,
9117 (__v2df)_mm_setzero_pd(),
9119}
9120
9121static __inline__ __m128d __DEFAULT_FN_ATTRS128
9122_mm_cvtu32_sd (__m128d __A, unsigned __B)
9123{
9124 __A[0] = __B;
9125 return __A;
9126}
9127
9128#ifdef __x86_64__
9129#define _mm_cvt_roundu64_sd(A, B, R) \
9130 (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
9131 (unsigned long long)(B), (int)(R))
9132
9133static __inline__ __m128d __DEFAULT_FN_ATTRS128
9134_mm_cvtu64_sd (__m128d __A, unsigned long long __B)
9135{
9136 __A[0] = __B;
9137 return __A;
9138}
9139#endif
9140
9141#define _mm_cvt_roundu32_ss(A, B, R) \
9142 (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
9143 (int)(R))
9144
9145static __inline__ __m128 __DEFAULT_FN_ATTRS128
9146_mm_cvtu32_ss (__m128 __A, unsigned __B)
9147{
9148 __A[0] = __B;
9149 return __A;
9150}
9151
9152#ifdef __x86_64__
9153#define _mm_cvt_roundu64_ss(A, B, R) \
9154 (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
9155 (unsigned long long)(B), (int)(R))
9156
9157static __inline__ __m128 __DEFAULT_FN_ATTRS128
9158_mm_cvtu64_ss (__m128 __A, unsigned long long __B)
9159{
9160 __A[0] = __B;
9161 return __A;
9162}
9163#endif
9164
9165static __inline__ __m512i __DEFAULT_FN_ATTRS512
9166_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
9167{
9168 return (__m512i) __builtin_ia32_selectd_512(__M,
9169 (__v16si) _mm512_set1_epi32(__A),
9170 (__v16si) __O);
9171}
9172
9173static __inline__ __m512i __DEFAULT_FN_ATTRS512
9174_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
9175{
9176 return (__m512i) __builtin_ia32_selectq_512(__M,
9177 (__v8di) _mm512_set1_epi64(__A),
9178 (__v8di) __O);
9179}
9180
9181static __inline __m512i __DEFAULT_FN_ATTRS512
9182_mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
9183 char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
9184 char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
9185 char __e46, char __e45, char __e44, char __e43, char __e42, char __e41,
9186 char __e40, char __e39, char __e38, char __e37, char __e36, char __e35,
9187 char __e34, char __e33, char __e32, char __e31, char __e30, char __e29,
9188 char __e28, char __e27, char __e26, char __e25, char __e24, char __e23,
9189 char __e22, char __e21, char __e20, char __e19, char __e18, char __e17,
9190 char __e16, char __e15, char __e14, char __e13, char __e12, char __e11,
9191 char __e10, char __e9, char __e8, char __e7, char __e6, char __e5,
9192 char __e4, char __e3, char __e2, char __e1, char __e0) {
9193
9194 return __extension__ (__m512i)(__v64qi)
9195 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9196 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9197 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9198 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31,
9199 __e32, __e33, __e34, __e35, __e36, __e37, __e38, __e39,
9200 __e40, __e41, __e42, __e43, __e44, __e45, __e46, __e47,
9201 __e48, __e49, __e50, __e51, __e52, __e53, __e54, __e55,
9202 __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
9203}
9204
9205static __inline __m512i __DEFAULT_FN_ATTRS512
9206_mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
9207 short __e27, short __e26, short __e25, short __e24, short __e23,
9208 short __e22, short __e21, short __e20, short __e19, short __e18,
9209 short __e17, short __e16, short __e15, short __e14, short __e13,
9210 short __e12, short __e11, short __e10, short __e9, short __e8,
9211 short __e7, short __e6, short __e5, short __e4, short __e3,
9212 short __e2, short __e1, short __e0) {
9213 return __extension__ (__m512i)(__v32hi)
9214 {__e0, __e1, __e2, __e3, __e4, __e5, __e6, __e7,
9215 __e8, __e9, __e10, __e11, __e12, __e13, __e14, __e15,
9216 __e16, __e17, __e18, __e19, __e20, __e21, __e22, __e23,
9217 __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
9218}
9219
9220static __inline __m512i __DEFAULT_FN_ATTRS512
9221_mm512_set_epi32 (int __A, int __B, int __C, int __D,
9222 int __E, int __F, int __G, int __H,
9223 int __I, int __J, int __K, int __L,
9224 int __M, int __N, int __O, int __P)
9225{
9226 return __extension__ (__m512i)(__v16si)
9227 { __P, __O, __N, __M, __L, __K, __J, __I,
9228 __H, __G, __F, __E, __D, __C, __B, __A };
9229}
9230
9231#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
9232 e8,e9,e10,e11,e12,e13,e14,e15) \
9233 _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
9234 (e5),(e4),(e3),(e2),(e1),(e0))
9235
9236static __inline__ __m512i __DEFAULT_FN_ATTRS512
9237_mm512_set_epi64 (long long __A, long long __B, long long __C,
9238 long long __D, long long __E, long long __F,
9239 long long __G, long long __H)
9240{
9241 return __extension__ (__m512i) (__v8di)
9242 { __H, __G, __F, __E, __D, __C, __B, __A };
9243}
9244
9245#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
9246 _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9247
9248static __inline__ __m512d __DEFAULT_FN_ATTRS512
9249_mm512_set_pd (double __A, double __B, double __C, double __D,
9250 double __E, double __F, double __G, double __H)
9251{
9252 return __extension__ (__m512d)
9253 { __H, __G, __F, __E, __D, __C, __B, __A };
9254}
9255
9256#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
9257 _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
9258
9259static __inline__ __m512 __DEFAULT_FN_ATTRS512
9260_mm512_set_ps (float __A, float __B, float __C, float __D,
9261 float __E, float __F, float __G, float __H,
9262 float __I, float __J, float __K, float __L,
9263 float __M, float __N, float __O, float __P)
9264{
9265 return __extension__ (__m512)
9266 { __P, __O, __N, __M, __L, __K, __J, __I,
9267 __H, __G, __F, __E, __D, __C, __B, __A };
9268}
9269
9270#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
9271 _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
9272 (e4),(e3),(e2),(e1),(e0))
9273
9274static __inline__ __m512 __DEFAULT_FN_ATTRS512
9275_mm512_abs_ps(__m512 __A)
9276{
9277 return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9278}
9279
9280static __inline__ __m512 __DEFAULT_FN_ATTRS512
9281_mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
9282{
9283 return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
9284}
9285
9286static __inline__ __m512d __DEFAULT_FN_ATTRS512
9287_mm512_abs_pd(__m512d __A)
9288{
9289 return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
9290}
9291
9292static __inline__ __m512d __DEFAULT_FN_ATTRS512
9293_mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
9294{
9295 return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
9296}
9297
9298/* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as
9299 * outputs. This class of vector operation forms the basis of many scientific
9300 * computations. In vector-reduction arithmetic, the evaluation off is
9301 * independent of the order of the input elements of V.
9302
9303 * Used bisection method. At each step, we partition the vector with previous
9304 * step in half, and the operation is performed on its two halves.
9305 * This takes log2(n) steps where n is the number of elements in the vector.
9306 */
9307
9308#define _mm512_mask_reduce_operator(op) \
9309 __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \
9310 __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \
9311 __m256i __t3 = (__m256i)(__t1 op __t2); \
9312 __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \
9313 __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \
9314 __v2du __t6 = __t4 op __t5; \
9315 __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9316 __v2du __t8 = __t6 op __t7; \
9317 return __t8[0]
9318
9319static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
9321}
9322
9323static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
9325}
9326
9327static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
9329}
9330
9331static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
9333}
9334
9335static __inline__ long long __DEFAULT_FN_ATTRS512
9337 __W = _mm512_maskz_mov_epi64(__M, __W);
9339}
9340
9341static __inline__ long long __DEFAULT_FN_ATTRS512
9346
9347static __inline__ long long __DEFAULT_FN_ATTRS512
9352
9353static __inline__ long long __DEFAULT_FN_ATTRS512
9355 __W = _mm512_maskz_mov_epi64(__M, __W);
9357}
9358#undef _mm512_mask_reduce_operator
9359
9360#define _mm512_mask_reduce_operator(op) \
9361 __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \
9362 __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \
9363 __m256d __t3 = __t1 op __t2; \
9364 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9365 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9366 __m128d __t6 = __t4 op __t5; \
9367 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9368 __m128d __t8 = __t6 op __t7; \
9369 return __t8[0]
9370
9371static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
9373}
9374
9375static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
9377}
9378
9379static __inline__ double __DEFAULT_FN_ATTRS512
9381 __W = _mm512_maskz_mov_pd(__M, __W);
9383}
9384
9385static __inline__ double __DEFAULT_FN_ATTRS512
9387 __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
9389}
9390#undef _mm512_mask_reduce_operator
9391
9392#define _mm512_mask_reduce_operator(op) \
9393 __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \
9394 __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \
9395 __m256i __t3 = (__m256i)(__t1 op __t2); \
9396 __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \
9397 __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \
9398 __v4su __t6 = __t4 op __t5; \
9399 __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9400 __v4su __t8 = __t6 op __t7; \
9401 __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9402 __v4su __t10 = __t8 op __t9; \
9403 return __t10[0]
9404
9405static __inline__ int __DEFAULT_FN_ATTRS512
9409
9410static __inline__ int __DEFAULT_FN_ATTRS512
9414
9415static __inline__ int __DEFAULT_FN_ATTRS512
9419
9420static __inline__ int __DEFAULT_FN_ATTRS512
9424
9425static __inline__ int __DEFAULT_FN_ATTRS512
9427 __W = _mm512_maskz_mov_epi32(__M, __W);
9429}
9430
9431static __inline__ int __DEFAULT_FN_ATTRS512
9436
9437static __inline__ int __DEFAULT_FN_ATTRS512
9442
9443static __inline__ int __DEFAULT_FN_ATTRS512
9445 __W = _mm512_maskz_mov_epi32(__M, __W);
9447}
9448#undef _mm512_mask_reduce_operator
9449
9450#define _mm512_mask_reduce_operator(op) \
9451 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \
9452 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \
9453 __m256 __t3 = __t1 op __t2; \
9454 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9455 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9456 __m128 __t6 = __t4 op __t5; \
9457 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9458 __m128 __t8 = __t6 op __t7; \
9459 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9460 __m128 __t10 = __t8 op __t9; \
9461 return __t10[0]
9462
9463static __inline__ float __DEFAULT_FN_ATTRS512
9467
9468static __inline__ float __DEFAULT_FN_ATTRS512
9472
9473static __inline__ float __DEFAULT_FN_ATTRS512
9475 __W = _mm512_maskz_mov_ps(__M, __W);
9477}
9478
9479static __inline__ float __DEFAULT_FN_ATTRS512
9481 __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
9483}
9484#undef _mm512_mask_reduce_operator
9485
9486#define _mm512_mask_reduce_operator(op) \
9487 __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \
9488 __m512i __t2 = _mm512_##op(__V, __t1); \
9489 __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \
9490 __m512i __t4 = _mm512_##op(__t2, __t3); \
9491 __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \
9492 __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
9493 return __t6[0]
9494
9495static __inline__ long long __DEFAULT_FN_ATTRS512
9497 _mm512_mask_reduce_operator(max_epi64);
9498}
9499
9500static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9502 _mm512_mask_reduce_operator(max_epu64);
9503}
9504
9505static __inline__ long long __DEFAULT_FN_ATTRS512
9507 _mm512_mask_reduce_operator(min_epi64);
9508}
9509
9510static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9512 _mm512_mask_reduce_operator(min_epu64);
9513}
9514
9515static __inline__ long long __DEFAULT_FN_ATTRS512
9517 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
9518 _mm512_mask_reduce_operator(max_epi64);
9519}
9520
9521static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9523 __V = _mm512_maskz_mov_epi64(__M, __V);
9524 _mm512_mask_reduce_operator(max_epu64);
9525}
9526
9527static __inline__ long long __DEFAULT_FN_ATTRS512
9529 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
9530 _mm512_mask_reduce_operator(min_epi64);
9531}
9532
9533static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
9535 __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
9536 _mm512_mask_reduce_operator(min_epu64);
9537}
9538#undef _mm512_mask_reduce_operator
9539
9540#define _mm512_mask_reduce_operator(op) \
9541 __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \
9542 __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \
9543 __m256i __t3 = _mm256_##op(__t1, __t2); \
9544 __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \
9545 __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \
9546 __m128i __t6 = _mm_##op(__t4, __t5); \
9547 __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \
9548 __m128i __t8 = _mm_##op(__t6, __t7); \
9549 __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \
9550 __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
9551 return __t10[0]
9552
9553static __inline__ int __DEFAULT_FN_ATTRS512
9555 _mm512_mask_reduce_operator(max_epi32);
9556}
9557
9558static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9560 _mm512_mask_reduce_operator(max_epu32);
9561}
9562
9563static __inline__ int __DEFAULT_FN_ATTRS512
9565 _mm512_mask_reduce_operator(min_epi32);
9566}
9567
9568static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9570 _mm512_mask_reduce_operator(min_epu32);
9571}
9572
9573static __inline__ int __DEFAULT_FN_ATTRS512
9575 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
9576 _mm512_mask_reduce_operator(max_epi32);
9577}
9578
9579static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9581 __V = _mm512_maskz_mov_epi32(__M, __V);
9582 _mm512_mask_reduce_operator(max_epu32);
9583}
9584
9585static __inline__ int __DEFAULT_FN_ATTRS512
9587 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
9588 _mm512_mask_reduce_operator(min_epi32);
9589}
9590
9591static __inline__ unsigned int __DEFAULT_FN_ATTRS512
9593 __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
9594 _mm512_mask_reduce_operator(min_epu32);
9595}
9596#undef _mm512_mask_reduce_operator
9597
9598#define _mm512_mask_reduce_operator(op) \
9599 __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \
9600 __m256d __t2 = _mm512_extractf64x4_pd(__V, 1); \
9601 __m256d __t3 = _mm256_##op(__t1, __t2); \
9602 __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \
9603 __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \
9604 __m128d __t6 = _mm_##op(__t4, __t5); \
9605 __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \
9606 __m128d __t8 = _mm_##op(__t6, __t7); \
9607 return __t8[0]
9608
9609static __inline__ double __DEFAULT_FN_ATTRS512
9612}
9613
9614static __inline__ double __DEFAULT_FN_ATTRS512
9617}
9618
9619static __inline__ double __DEFAULT_FN_ATTRS512
9621 __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
9623}
9624
9625static __inline__ double __DEFAULT_FN_ATTRS512
9627 __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
9629}
9630#undef _mm512_mask_reduce_operator
9631
9632#define _mm512_mask_reduce_operator(op) \
9633 __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 0); \
9634 __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__V, 1); \
9635 __m256 __t3 = _mm256_##op(__t1, __t2); \
9636 __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \
9637 __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \
9638 __m128 __t6 = _mm_##op(__t4, __t5); \
9639 __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \
9640 __m128 __t8 = _mm_##op(__t6, __t7); \
9641 __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \
9642 __m128 __t10 = _mm_##op(__t8, __t9); \
9643 return __t10[0]
9644
9645static __inline__ float __DEFAULT_FN_ATTRS512
9648}
9649
9650static __inline__ float __DEFAULT_FN_ATTRS512
9653}
9654
9655static __inline__ float __DEFAULT_FN_ATTRS512
9657 __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
9659}
9660
9661static __inline__ float __DEFAULT_FN_ATTRS512
9663 __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
9665}
9666#undef _mm512_mask_reduce_operator
9667
9679static __inline__ int __DEFAULT_FN_ATTRS512
9681 __v16si __b = (__v16si)__A;
9682 return __b[0];
9683}
9684
9685#undef __DEFAULT_FN_ATTRS512
9686#undef __DEFAULT_FN_ATTRS128
9687#undef __DEFAULT_FN_ATTRS
9688
9689#endif /* __AVX512FINTRIN_H */
static __inline unsigned char unsigned int unsigned int unsigned int * __p
Definition adxintrin.h:24
static __inline__ vector float vector float __b
Definition altivec.h:520
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32(__m128d __A)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void)
#define _mm512_mask_reduce_operator(op)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32(__mmask16 __U, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16(__m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p)
#define _mm512_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss(__m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32(__m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512(void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32(__m128i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8(__m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd(__m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64(__m512i __A, __m512i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32(__m128 __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kmov(__mmask16 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_cvtsi512_si32(__m512i __A)
Moves the least significant 32 bits of a vector of [16 x i32] to a 32-bit signed integer value.
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd(__mmask8 __U, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
#define _mm512_cmpeq_epi32_mask(A, B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32(__m512i __O, __mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kor(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512(__m128 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps(__m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo(__m512 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS _store_mask16(__mmask16 *__A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32(__m512i __O, __mmask16 __M, int __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kand(__mmask16 __A, __mmask16 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd(void *__P, __m512d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps(__m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss(__mmask8 __U, const float *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8(__m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32(int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, int __M, int __N, int __O, int __P)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd(double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8(char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, char __e46, char __e45, char __e44, char __e43, char __e42, char __e41, char __e40, char __e39, char __e38, char __e37, char __e36, char __e35, char __e34, char __e33, char __e32, char __e31, char __e30, char __e29, char __e28, char __e27, char __e26, char __e25, char __e24, char __e23, char __e22, char __e21, char __e20, char __e19, char __e18, char __e17, char __e16, char __e15, char __e14, char __e13, char __e12, char __e11, char __e10, char __e9, char __e8, char __e7, char __e6, char __e5, char __e4, char __e3, char __e2, char __e1, char __e0)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64(void *__P, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64(long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
#define __DEFAULT_FN_ATTRS512
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 256-bit floating-point vector of [4...
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, unsigned int __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd(double *__W, __mmask8 __U, __m128d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps(float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, float __M, float __N, float __O, float __P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestz(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_mask2int(__mmask16 __a)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a)
Constructs a 512-bit integer vector from a 256-bit integer vector.
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
#define __DEFAULT_FN_ATTRS
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps(__mmask16 __M, __m128 __A)
#define _mm512_cmpneq_epi64_mask(A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd(__m512i __X, __m512d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, unsigned int __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestz_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd(__m512d __W, __mmask8 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
_MM_MANTISSA_NORM_ENUM
@ _MM_MANT_NORM_p5_1
@ _MM_MANT_NORM_p5_2
@ _MM_MANT_NORM_1_2
@ _MM_MANT_NORM_p75_1p5
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss(__mmask8 __U, __m128 __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss(__m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd(__m512 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V)
static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V)
#define _mm512_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32(__m512i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd(__m512d __O, __mmask8 __M, __m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps(__m256 __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
#define _mm512_cmpneq_epi32_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps(__m512 __W, __mmask16 __U, __m256i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd(__m128d __A, unsigned __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps(__mmask16 __U, __m512 __A)
static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps(__m512 __O, __mmask16 __M, __m128 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss(__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8(__m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
#define _mm512_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
char __v64qi __attribute__((__vector_size__(64)))
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps(float __A, float __B, float __C, float __D)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 256-bit floating-point vector of [8...
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512(void *__P, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64(__m512i __O, __mmask8 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B)
static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256(__m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
#define _mm512_mask_cmpneq_epi64_mask(k, A, B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd(__m512d __W, __mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd(__m256 __A)
_MM_CMPINT_ENUM
@ _MM_CMPINT_NE
@ _MM_CMPINT_NLT
@ _MM_CMPINT_LE
@ _MM_CMPINT_EQ
@ _MM_CMPINT_LT
@ _MM_CMPINT_UNUSED
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64(__mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64(__m512i __O, __mmask8 __M, long long __A)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _load_mask16(__mmask16 *__A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32(void *__P, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32(__mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a)
unsigned char __mmask8
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxor(__mmask16 __A, __mmask16 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a)
Constructs a 512-bit integer vector from a 128-bit integer vector.
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32(__m128d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd(double __A, double __B, double __C, double __D)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kandn(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps(void *__P, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32(__mmask16 __U, __m512 __A)
static __inline__ unsigned char __DEFAULT_FN_ATTRS _kortestc_mask16_u8(__mmask16 __A, __mmask16 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps(__m512 __W, __mmask16 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps(__mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32(__mmask16 __U, void const *__P)
_MM_MANTISSA_SIGN_ENUM
@ _MM_MANT_SIGN_zero
@ _MM_MANT_SIGN_src
@ _MM_MANT_SIGN_nan
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_int2mask(int __a)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32(__mmask16 __U, __m512 __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
#define __DEFAULT_FN_ATTRS128
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps(__m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss(__m128 __W, __mmask8 __U, const float *__A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd(__mmask8 __U, __m512d __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512(__m512d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512(void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ int __DEFAULT_FN_ATTRS _mm512_kortestc(__mmask16 __A, __mmask16 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32(__m512i __A)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32(void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512(__m128d __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps(__m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32(__mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64(__m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32(__mmask16 __M, __m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd(__mmask8 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32(__mmask16 __U, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64(long long __A, long long __B, long long __C, long long __D)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd(__mmask8 __U, const double *__A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32(__m512i __W, __mmask16 __U, __m512 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16(__m512i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32(__m512i __X, __m512i __Y)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd(__mmask8 __U, __m128d __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, unsigned int __B)
_MM_PERM_ENUM
@ _MM_PERM_BBCA
@ _MM_PERM_BCBD
@ _MM_PERM_DAAB
@ _MM_PERM_CBBD
@ _MM_PERM_DCCC
@ _MM_PERM_CDBB
@ _MM_PERM_DDDD
@ _MM_PERM_CCCC
@ _MM_PERM_CADA
@ _MM_PERM_BACD
@ _MM_PERM_CCAD
@ _MM_PERM_ABDB
@ _MM_PERM_BBBD
@ _MM_PERM_DCAB
@ _MM_PERM_BABC
@ _MM_PERM_AACD
@ _MM_PERM_BBAB
@ _MM_PERM_DCDB
@ _MM_PERM_BACC
@ _MM_PERM_ABDA
@ _MM_PERM_ACCC
@ _MM_PERM_ADAC
@ _MM_PERM_CCCD
@ _MM_PERM_CADD
@ _MM_PERM_ACCB
@ _MM_PERM_BBDB
@ _MM_PERM_ABBB
@ _MM_PERM_BACB
@ _MM_PERM_CDCA
@ _MM_PERM_ACBC
@ _MM_PERM_ADCB
@ _MM_PERM_BBBC
@ _MM_PERM_DBBA
@ _MM_PERM_BBBB
@ _MM_PERM_DDDB
@ _MM_PERM_CAAA
@ _MM_PERM_ADBB
@ _MM_PERM_ACDB
@ _MM_PERM_DCAD
@ _MM_PERM_DBBC
@ _MM_PERM_BAAB
@ _MM_PERM_BDDD
@ _MM_PERM_BBAD
@ _MM_PERM_DDBA
@ _MM_PERM_CDCD
@ _MM_PERM_CCCA
@ _MM_PERM_DBBB
@ _MM_PERM_DAAD
@ _MM_PERM_DCBA
@ _MM_PERM_CCBC
@ _MM_PERM_ADDD
@ _MM_PERM_DBAC
@ _MM_PERM_ABAB
@ _MM_PERM_CBDB
@ _MM_PERM_CDBC
@ _MM_PERM_AABC
@ _MM_PERM_DABD
@ _MM_PERM_CBBA
@ _MM_PERM_CBAA
@ _MM_PERM_BDDB
@ _MM_PERM_CABC
@ _MM_PERM_BDBD
@ _MM_PERM_BCAD
@ _MM_PERM_ACBA
@ _MM_PERM_ADBA
@ _MM_PERM_ADBC
@ _MM_PERM_DBCB
@ _MM_PERM_CBDC
@ _MM_PERM_CBAD
@ _MM_PERM_ABCC
@ _MM_PERM_AAAD
@ _MM_PERM_CBAC
@ _MM_PERM_CCDA
@ _MM_PERM_CDAC
@ _MM_PERM_BADD
@ _MM_PERM_DAAC
@ _MM_PERM_BCCC
@ _MM_PERM_DBBD
@ _MM_PERM_DDAC
@ _MM_PERM_DACD
@ _MM_PERM_BAAC
@ _MM_PERM_ACCA
@ _MM_PERM_ABDD
@ _MM_PERM_BBCC
@ _MM_PERM_DAAA
@ _MM_PERM_CAAB
@ _MM_PERM_BCDB
@ _MM_PERM_ACBB
@ _MM_PERM_CDAB
@ _MM_PERM_DBDB
@ _MM_PERM_AABB
@ _MM_PERM_DBDA
@ _MM_PERM_BCBA
@ _MM_PERM_CBAB
@ _MM_PERM_DCDC
@ _MM_PERM_BBCB
@ _MM_PERM_CDCB
@ _MM_PERM_AACA
@ _MM_PERM_ACBD
@ _MM_PERM_AAAB
@ _MM_PERM_DCCB
@ _MM_PERM_ADDB
@ _MM_PERM_AAAA
@ _MM_PERM_AACC
@ _MM_PERM_BDDC
@ _MM_PERM_CBBC
@ _MM_PERM_DDCC
@ _MM_PERM_CABD
@ _MM_PERM_AADC
@ _MM_PERM_BCBC
@ _MM_PERM_BCCA
@ _MM_PERM_CCBD
@ _MM_PERM_CBBB
@ _MM_PERM_CDBA
@ _MM_PERM_CACD
@ _MM_PERM_BDAD
@ _MM_PERM_ADCA
@ _MM_PERM_AAAC
@ _MM_PERM_BDDA
@ _MM_PERM_CCAC
@ _MM_PERM_ACDC
@ _MM_PERM_DBCA
@ _MM_PERM_DBAA
@ _MM_PERM_AABD
@ _MM_PERM_CDCC
@ _MM_PERM_DCAA
@ _MM_PERM_DDDC
@ _MM_PERM_CDDB
@ _MM_PERM_AABA
@ _MM_PERM_DDBB
@ _MM_PERM_CDDA
@ _MM_PERM_AADD
@ _MM_PERM_BADC
@ _MM_PERM_BDBA
@ _MM_PERM_DBDD
@ _MM_PERM_BDAC
@ _MM_PERM_DBDC
@ _MM_PERM_BBBA
@ _MM_PERM_DDBC
@ _MM_PERM_BAAA
@ _MM_PERM_BDCC
@ _MM_PERM_DDAB
@ _MM_PERM_BCCB
@ _MM_PERM_BCCD
@ _MM_PERM_ADBD
@ _MM_PERM_ADCC
@ _MM_PERM_CCBB
@ _MM_PERM_CDAA
@ _MM_PERM_BBDA
@ _MM_PERM_CACC
@ _MM_PERM_DCBB
@ _MM_PERM_DABA
@ _MM_PERM_BADB
@ _MM_PERM_ABCA
@ _MM_PERM_CBCC
@ _MM_PERM_ABAD
@ _MM_PERM_BDBC
@ _MM_PERM_DDDA
@ _MM_PERM_ADAB
@ _MM_PERM_CADB
@ _MM_PERM_ADAA
@ _MM_PERM_ACAC
@ _MM_PERM_DADD
@ _MM_PERM_BABD
@ _MM_PERM_ACCD
@ _MM_PERM_CCAA
@ _MM_PERM_AADA
@ _MM_PERM_BDCA
@ _MM_PERM_CDDD
@ _MM_PERM_ABBD
@ _MM_PERM_ACAA
@ _MM_PERM_ACDD
@ _MM_PERM_DABB
@ _MM_PERM_CCCB
@ _MM_PERM_AADB
@ _MM_PERM_DBAD
@ _MM_PERM_BBDD
@ _MM_PERM_BCDC
@ _MM_PERM_CABA
@ _MM_PERM_BBAA
@ _MM_PERM_ADAD
@ _MM_PERM_BADA
@ _MM_PERM_DCDA
@ _MM_PERM_ABBA
@ _MM_PERM_ACAB
@ _MM_PERM_CCDD
@ _MM_PERM_CADC
@ _MM_PERM_DDCB
@ _MM_PERM_BABB
@ _MM_PERM_CCDB
@ _MM_PERM_DDAD
@ _MM_PERM_DBCC
@ _MM_PERM_BCBB
@ _MM_PERM_ADDC
@ _MM_PERM_CCBA
@ _MM_PERM_ABCD
@ _MM_PERM_BCAB
@ _MM_PERM_DCBC
@ _MM_PERM_BCDD
@ _MM_PERM_CCDC
@ _MM_PERM_ABAC
@ _MM_PERM_CBCB
@ _MM_PERM_CCAB
@ _MM_PERM_DDCD
@ _MM_PERM_DACA
@ _MM_PERM_ACAD
@ _MM_PERM_BABA
@ _MM_PERM_CBCD
@ _MM_PERM_CAAD
@ _MM_PERM_DCDD
@ _MM_PERM_BDBB
@ _MM_PERM_BCAA
@ _MM_PERM_ABDC
@ _MM_PERM_BBCD
@ _MM_PERM_CAAC
@ _MM_PERM_BBAC
@ _MM_PERM_CBCA
@ _MM_PERM_DCAC
@ _MM_PERM_ABAA
@ _MM_PERM_CACB
@ _MM_PERM_BBDC
@ _MM_PERM_CDAD
@ _MM_PERM_ADCD
@ _MM_PERM_DADB
@ _MM_PERM_DBCD
@ _MM_PERM_DACC
@ _MM_PERM_DACB
@ _MM_PERM_DCBD
@ _MM_PERM_CACA
@ _MM_PERM_ABBC
@ _MM_PERM_DCCA
@ _MM_PERM_DABC
@ _MM_PERM_CBDD
@ _MM_PERM_DDBD
@ _MM_PERM_DDCA
@ _MM_PERM_BDCD
@ _MM_PERM_CDBD
@ _MM_PERM_ABCB
@ _MM_PERM_CDDC
@ _MM_PERM_AACB
@ _MM_PERM_DDAA
@ _MM_PERM_ADDA
@ _MM_PERM_DADA
@ _MM_PERM_BCDA
@ _MM_PERM_BDAB
@ _MM_PERM_BAAD
@ _MM_PERM_DBAB
@ _MM_PERM_DCCD
@ _MM_PERM_CABB
@ _MM_PERM_BDAA
@ _MM_PERM_BDCB
@ _MM_PERM_ACDA
@ _MM_PERM_DADC
@ _MM_PERM_CBDA
@ _MM_PERM_BCAC
@ _MM_PERM_BACA
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32(__m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64(__m512i __A, __m512i __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64(void const *__P)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32(void *__P, __m512i __A)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps(__m512i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask(__m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps(__m512i __X, __m512 __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _cvtu32_mask16(unsigned int __A)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kunpackb(__mmask16 __A, __mmask16 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd(__m128d __W, __mmask8 __U, const double *__A)
static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps(__m512d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd(__m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, short __e17, short __e16, short __e15, short __e14, short __e13, short __e12, short __e11, short __e10, short __e9, short __e8, short __e7, short __e6, short __e5, short __e4, short __e3, short __e2, short __e1, short __e0)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16(__m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss(float *__W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B)
static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32(void *__P, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
unsigned short __mmask16
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a)
Constructs a 512-bit floating-point vector of [16 x float] from a 128-bit floating-point vector of [4...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd(__m512d __W, __mmask8 __U, __m512d __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64(__m512i __X, __m512i __Y)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64(__m512i __A, __m512i __B)
static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256(__m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps(__mmask16 __U, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
#define _MM_FROUND_CUR_DIRECTION
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32(__mmask16 __M, __m128i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd(__mmask8 __U, __m512d __A)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32(__mmask16 __M, __m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __mmask16 __DEFAULT_FN_ATTRS _mm512_kxnor(__mmask16 __A, __mmask16 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, unsigned int __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32(int __A, int __B, int __C, int __D)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64(__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, unsigned int __B)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32(__m512i __A)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a)
Constructs a 512-bit floating-point vector of [8 x double] from a 128-bit floating-point vector of [2...
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
#define _mm512_cmpeq_epi64_mask(A, B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64(void const *__P)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32(__mmask16 __U, __m512 __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512(__m256i __A)
static __inline __mmask16 __DEFAULT_FN_ATTRS _mm512_knot(__mmask16 __M)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64(__m512i __A, __m512i __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void)
static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a)
static __inline__ unsigned int __DEFAULT_FN_ATTRS _cvtmask16_u32(__mmask16 __A)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss(__m128 __A, unsigned __B)
static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, unsigned int __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask(__mmask8 __U, __m512i __A, __m512i __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64(__mmask8 __U, __m512i __A)
static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W)
static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd(__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd(__m512i __A)
static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V)
static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8(void *__P, __mmask16 __M, __m512i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd(__m128d __A, __m128d __B)
static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A)
static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4268
static __inline__ __m256 __DEFAULT_FN_ATTRS _mm256_undefined_ps(void)
Create a 256-bit vector of [8 x float] with undefined values.
Definition avxintrin.h:3581
static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_undefined_si256(void)
Create a 256-bit integer vector with undefined values.
Definition avxintrin.h:3594
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4254
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4281
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, __m128d __b)
Adds lower double-precision values in both operands and returns the sum in the lower 64 bits of the r...
Definition emmintrin.h:56
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, __m128d __b)
Divides the lower double-precision value of the first operand by the lower double-precision value of ...
Definition emmintrin.h:181
static __inline__ void int __a
Definition emmintrin.h:4185
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, __m128d __b)
Multiplies lower double-precision values in both operands and returns the product in the lower 64 bit...
Definition emmintrin.h:139
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, __m128d __b)
Constructs a 128-bit floating-point vector of [2 x double].
Definition emmintrin.h:1932
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3587
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, __m128d __b)
Subtracts the lower double-precision value of the second operand from the lower double-precision valu...
Definition emmintrin.h:98
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1911
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3977
static __inline__ unsigned int unsigned char __D
Definition ia32intrin.h:283
static __inline__ unsigned char int __C
Definition ia32intrin.h:373
struct __storeu_i16 *__P __v
Definition immintrin.h:348
static __inline__ void const void * __src
#define _MM_FROUND_FLOOR
Definition smmintrin.h:29
#define _MM_FROUND_CEIL
Definition smmintrin.h:30
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:1903
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ss(__m128 __a, __m128 __b)
Subtracts the 32-bit float value in the low-order bits of the second operand from the corresponding v...
Definition xmmintrin.h:92
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_move_ss(__m128 __a, __m128 __b)
Constructs a 128-bit floating-point vector of [4 x float].
Definition xmmintrin.h:2668
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ss(__m128 __a, __m128 __b)
Multiplies two 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:135
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ss(__m128 __a, __m128 __b)
Adds the 32-bit float values in the low-order bits of the operands.
Definition xmmintrin.h:50
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ss(__m128 __a, __m128 __b)
Divides the value in the low-order 32 bits of the first operand by the corresponding value in the sec...
Definition xmmintrin.h:177