11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
14 #ifndef __AVX512VLINTRIN_H
15 #define __AVX512VLINTRIN_H
17 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
18 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
26 #define _mm_cmpeq_epi32_mask(A, B) \
27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
28 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
30 #define _mm_cmpge_epi32_mask(A, B) \
31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
32 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
34 #define _mm_cmpgt_epi32_mask(A, B) \
35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
36 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
38 #define _mm_cmple_epi32_mask(A, B) \
39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
40 #define _mm_mask_cmple_epi32_mask(k, A, B) \
41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
42 #define _mm_cmplt_epi32_mask(A, B) \
43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
44 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46 #define _mm_cmpneq_epi32_mask(A, B) \
47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
48 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
51 #define _mm256_cmpeq_epi32_mask(A, B) \
52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
53 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
55 #define _mm256_cmpge_epi32_mask(A, B) \
56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
57 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
59 #define _mm256_cmpgt_epi32_mask(A, B) \
60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
61 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
63 #define _mm256_cmple_epi32_mask(A, B) \
64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
65 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
67 #define _mm256_cmplt_epi32_mask(A, B) \
68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
69 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
71 #define _mm256_cmpneq_epi32_mask(A, B) \
72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
73 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
76 #define _mm_cmpeq_epu32_mask(A, B) \
77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
78 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
80 #define _mm_cmpge_epu32_mask(A, B) \
81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
82 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
84 #define _mm_cmpgt_epu32_mask(A, B) \
85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
86 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
88 #define _mm_cmple_epu32_mask(A, B) \
89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
90 #define _mm_mask_cmple_epu32_mask(k, A, B) \
91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
92 #define _mm_cmplt_epu32_mask(A, B) \
93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
94 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
96 #define _mm_cmpneq_epu32_mask(A, B) \
97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
98 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
101 #define _mm256_cmpeq_epu32_mask(A, B) \
102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
103 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
105 #define _mm256_cmpge_epu32_mask(A, B) \
106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
107 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
109 #define _mm256_cmpgt_epu32_mask(A, B) \
110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
111 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
113 #define _mm256_cmple_epu32_mask(A, B) \
114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
115 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
117 #define _mm256_cmplt_epu32_mask(A, B) \
118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
119 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
121 #define _mm256_cmpneq_epu32_mask(A, B) \
122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
123 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
126 #define _mm_cmpeq_epi64_mask(A, B) \
127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
128 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
130 #define _mm_cmpge_epi64_mask(A, B) \
131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
132 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
134 #define _mm_cmpgt_epi64_mask(A, B) \
135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
136 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
138 #define _mm_cmple_epi64_mask(A, B) \
139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
140 #define _mm_mask_cmple_epi64_mask(k, A, B) \
141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
142 #define _mm_cmplt_epi64_mask(A, B) \
143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
144 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
146 #define _mm_cmpneq_epi64_mask(A, B) \
147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
148 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
151 #define _mm256_cmpeq_epi64_mask(A, B) \
152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
153 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
155 #define _mm256_cmpge_epi64_mask(A, B) \
156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
157 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
159 #define _mm256_cmpgt_epi64_mask(A, B) \
160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
161 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
163 #define _mm256_cmple_epi64_mask(A, B) \
164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
165 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
167 #define _mm256_cmplt_epi64_mask(A, B) \
168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
169 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
171 #define _mm256_cmpneq_epi64_mask(A, B) \
172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
173 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
176 #define _mm_cmpeq_epu64_mask(A, B) \
177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
178 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
180 #define _mm_cmpge_epu64_mask(A, B) \
181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
182 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
184 #define _mm_cmpgt_epu64_mask(A, B) \
185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
186 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
188 #define _mm_cmple_epu64_mask(A, B) \
189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
190 #define _mm_mask_cmple_epu64_mask(k, A, B) \
191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
192 #define _mm_cmplt_epu64_mask(A, B) \
193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
194 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
196 #define _mm_cmpneq_epu64_mask(A, B) \
197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
198 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
201 #define _mm256_cmpeq_epu64_mask(A, B) \
202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
203 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
205 #define _mm256_cmpge_epu64_mask(A, B) \
206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
207 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
209 #define _mm256_cmpgt_epu64_mask(A, B) \
210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
211 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
213 #define _mm256_cmple_epu64_mask(A, B) \
214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
215 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
217 #define _mm256_cmplt_epu64_mask(A, B) \
218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
219 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
221 #define _mm256_cmpneq_epu64_mask(A, B) \
222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
223 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
229 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
237 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
245 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
253 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
261 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
269 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
277 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
285 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
293 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
301 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
309 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
317 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
325 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
333 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
341 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
349 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
357 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
365 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
373 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
381 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
389 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
397 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
405 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
413 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
421 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
429 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
437 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
445 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
453 return (__m256i)((__v8su)
__a & (__v8su)
__b);
459 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
473 return (__m128i)((__v4su)
__a & (__v4su)
__b);
479 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
493 return (__m256i)(~(__v8su)__A & (__v8su)__B);
499 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
514 return (__m128i)(~(__v4su)__A & (__v4su)__B);
520 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
534 return (__m256i)((__v8su)
__a | (__v8su)
__b);
540 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
554 return (__m128i)((__v4su)
__a | (__v4su)
__b);
560 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
574 return (__m256i)((__v8su)
__a ^ (__v8su)
__b);
580 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
594 return (__m128i)((__v4su)
__a ^ (__v4su)
__b);
600 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
614 return (__m256i)((__v4du)
__a & (__v4du)
__b);
620 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
634 return (__m128i)((__v2du)
__a & (__v2du)
__b);
640 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
654 return (__m256i)(~(__v4du)__A & (__v4du)__B);
660 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
675 return (__m128i)(~(__v2du)__A & (__v2du)__B);
681 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
695 return (__m256i)((__v4du)
__a | (__v4du)
__b);
701 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
715 return (__m128i)((__v2du)
__a | (__v2du)
__b);
721 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
735 return (__m256i)((__v4du)
__a ^ (__v4du)
__b);
741 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
755 return (__m128i)((__v2du)
__a ^ (__v2du)
__b);
762 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
773 #define _mm_cmp_epi32_mask(a, b, p) \
774 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
775 (__v4si)(__m128i)(b), (int)(p), \
778 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
779 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
780 (__v4si)(__m128i)(b), (int)(p), \
783 #define _mm_cmp_epu32_mask(a, b, p) \
784 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
785 (__v4si)(__m128i)(b), (int)(p), \
788 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
789 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
790 (__v4si)(__m128i)(b), (int)(p), \
793 #define _mm256_cmp_epi32_mask(a, b, p) \
794 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
795 (__v8si)(__m256i)(b), (int)(p), \
798 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
799 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
800 (__v8si)(__m256i)(b), (int)(p), \
803 #define _mm256_cmp_epu32_mask(a, b, p) \
804 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
805 (__v8si)(__m256i)(b), (int)(p), \
808 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
809 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
810 (__v8si)(__m256i)(b), (int)(p), \
813 #define _mm_cmp_epi64_mask(a, b, p) \
814 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
815 (__v2di)(__m128i)(b), (int)(p), \
818 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
819 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
820 (__v2di)(__m128i)(b), (int)(p), \
823 #define _mm_cmp_epu64_mask(a, b, p) \
824 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
825 (__v2di)(__m128i)(b), (int)(p), \
828 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
829 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
830 (__v2di)(__m128i)(b), (int)(p), \
833 #define _mm256_cmp_epi64_mask(a, b, p) \
834 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
835 (__v4di)(__m256i)(b), (int)(p), \
838 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
839 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
840 (__v4di)(__m256i)(b), (int)(p), \
843 #define _mm256_cmp_epu64_mask(a, b, p) \
844 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
845 (__v4di)(__m256i)(b), (int)(p), \
848 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
849 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
850 (__v4di)(__m256i)(b), (int)(p), \
853 #define _mm256_cmp_ps_mask(a, b, p) \
854 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
855 (__v8sf)(__m256)(b), (int)(p), \
858 #define _mm256_mask_cmp_ps_mask(m, a, b, p) \
859 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
860 (__v8sf)(__m256)(b), (int)(p), \
863 #define _mm256_cmp_pd_mask(a, b, p) \
864 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
865 (__v4df)(__m256d)(b), (int)(p), \
868 #define _mm256_mask_cmp_pd_mask(m, a, b, p) \
869 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
870 (__v4df)(__m256d)(b), (int)(p), \
873 #define _mm_cmp_ps_mask(a, b, p) \
874 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
875 (__v4sf)(__m128)(b), (int)(p), \
878 #define _mm_mask_cmp_ps_mask(m, a, b, p) \
879 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
880 (__v4sf)(__m128)(b), (int)(p), \
883 #define _mm_cmp_pd_mask(a, b, p) \
884 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
885 (__v2df)(__m128d)(b), (int)(p), \
888 #define _mm_mask_cmp_pd_mask(m, a, b, p) \
889 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
890 (__v2df)(__m128d)(b), (int)(p), \
896 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
897 __builtin_ia32_vfmaddpd ((__v2df) __A,
906 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
907 __builtin_ia32_vfmaddpd ((__v2df) __A,
916 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
917 __builtin_ia32_vfmaddpd ((__v2df) __A,
926 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
927 __builtin_ia32_vfmaddpd ((__v2df) __A,
936 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
937 __builtin_ia32_vfmaddpd ((__v2df) __A,
946 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
947 __builtin_ia32_vfmaddpd (-(__v2df) __A,
956 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
957 __builtin_ia32_vfmaddpd (-(__v2df) __A,
966 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
967 __builtin_ia32_vfmaddpd (-(__v2df) __A,
976 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
977 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
986 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
987 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
996 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
997 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1006 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1016 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1026 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1036 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1046 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1056 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1057 __builtin_ia32_vfmaddps ((__v4sf) __A,
1066 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1067 __builtin_ia32_vfmaddps ((__v4sf) __A,
1076 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1077 __builtin_ia32_vfmaddps ((__v4sf) __A,
1086 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1087 __builtin_ia32_vfmaddps ((__v4sf) __A,
1096 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1097 __builtin_ia32_vfmaddps ((__v4sf) __A,
1106 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1107 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1116 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1117 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1126 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1127 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1136 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1146 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1156 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1166 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1176 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1186 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1196 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1206 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1216 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1226 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1236 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1246 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1256 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1266 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1276 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1286 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1296 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1306 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1316 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1326 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1336 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1346 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1356 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1367 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1377 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1387 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1397 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1407 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1417 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1418 __builtin_ia32_vfmaddpd ((__v2df) __A,
1427 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1437 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1438 __builtin_ia32_vfmaddps ((__v4sf) __A,
1447 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1457 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1467 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1477 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1487 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1497 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1498 __builtin_ia32_vfmaddpd ((__v2df) __A,
1507 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1517 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1518 __builtin_ia32_vfmaddps ((__v4sf) __A,
1527 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1537 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1538 __builtin_ia32_vfmaddpd ((__v2df) __A,
1547 return (__m128d) __builtin_ia32_selectpd_128((
__mmask8) __U,
1548 __builtin_ia32_vfmaddpd ((__v2df) __A,
1557 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1567 return (__m256d) __builtin_ia32_selectpd_256((
__mmask8) __U,
1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1577 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1578 __builtin_ia32_vfmaddps ((__v4sf) __A,
1587 return (__m128) __builtin_ia32_selectps_128((
__mmask8) __U,
1588 __builtin_ia32_vfmaddps ((__v4sf) __A,
1597 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1607 return (__m256) __builtin_ia32_selectps_256((
__mmask8) __U,
1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1616 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1623 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
1630 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1637 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
1644 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1651 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1658 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1665 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1672 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
1679 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
1686 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
1693 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
1700 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
1707 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
1714 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
1721 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1904 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1911 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
1918 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1925 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
1932 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1939 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
1946 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1953 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1975 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1982 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
2004 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2011 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2064 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2071 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2078 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2085 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2092 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2099 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2106 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2113 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2181 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2188 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2241 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2248 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2255 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2262 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2315 return (__m128d) __builtin_convertvector(
2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2321 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2328 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8) __U,
2335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2340 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2347 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8) __U,
2354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2359 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2366 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2378 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2385 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2392 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2399 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2406 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2413 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2420 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2427 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2434 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2441 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((
const __v2df *) __P,
2516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((
const __v2df *) __P,
2525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((
const __v4df *) __P,
2533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((
const __v4df *) __P,
2542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((
const __v2di *) __P,
2550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((
const __v2di *) __P,
2560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((
const __v4di *) __P,
2568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((
const __v4di *) __P,
2577 return (__m128) __builtin_ia32_expandloadsf128_mask ((
const __v4sf *) __P,
2584 return (__m128) __builtin_ia32_expandloadsf128_mask ((
const __v4sf *) __P,
2593 return (__m256) __builtin_ia32_expandloadsf256_mask ((
const __v8sf *) __P,
2600 return (__m256) __builtin_ia32_expandloadsf256_mask ((
const __v8sf *) __P,
2609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((
const __v4si *) __P,
2617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((
const __v4si *) __P,
2626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((
const __v8si *) __P,
2634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((
const __v8si *) __P,
2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2795 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2802 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2809 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2816 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2823 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2830 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2837 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2844 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2851 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2858 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2865 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2872 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2879 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2886 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2893 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2900 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2907 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2914 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
2921 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2928 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
2935 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2942 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
2949 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2956 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
2963 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2970 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
2977 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2984 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
2991 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
2996 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
3003 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
3010 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3015 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
3022 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
3029 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3036 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3043 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3050 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3057 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
3062 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3069 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3076 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
3081 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3088 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3095 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3102 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3109 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3116 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3123 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3128 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3135 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3142 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3147 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3154 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3161 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3168 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3175 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3182 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3189 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3194 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3201 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3208 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3213 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3220 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3227 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3234 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
3241 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3248 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
3255 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3260 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3267 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__M,
3274 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3279 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3286 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
3291 #define _mm_roundscale_pd(A, imm) \
3292 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3294 (__v2df)_mm_setzero_pd(), \
3298 #define _mm_mask_roundscale_pd(W, U, A, imm) \
3299 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3301 (__v2df)(__m128d)(W), \
3305 #define _mm_maskz_roundscale_pd(U, A, imm) \
3306 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3308 (__v2df)_mm_setzero_pd(), \
3312 #define _mm256_roundscale_pd(A, imm) \
3313 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3315 (__v4df)_mm256_setzero_pd(), \
3319 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
3320 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3322 (__v4df)(__m256d)(W), \
3326 #define _mm256_maskz_roundscale_pd(U, A, imm) \
3327 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3329 (__v4df)_mm256_setzero_pd(), \
3332 #define _mm_roundscale_ps(A, imm) \
3333 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3334 (__v4sf)_mm_setzero_ps(), \
3338 #define _mm_mask_roundscale_ps(W, U, A, imm) \
3339 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)(__m128)(W), \
3344 #define _mm_maskz_roundscale_ps(U, A, imm) \
3345 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)_mm_setzero_ps(), \
3349 #define _mm256_roundscale_ps(A, imm) \
3350 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3351 (__v8sf)_mm256_setzero_ps(), \
3354 #define _mm256_mask_roundscale_ps(W, U, A, imm) \
3355 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3356 (__v8sf)(__m256)(W), \
3360 #define _mm256_maskz_roundscale_ps(U, A, imm) \
3361 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)_mm256_setzero_ps(), \
3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3472 #define _mm_i64scatter_pd(addr, index, v1, scale) \
3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3474 (__v2di)(__m128i)(index), \
3475 (__v2df)(__m128d)(v1), (int)(scale))
3477 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3479 (__v2di)(__m128i)(index), \
3480 (__v2df)(__m128d)(v1), (int)(scale))
3482 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3484 (__v2di)(__m128i)(index), \
3485 (__v2di)(__m128i)(v1), (int)(scale))
3487 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3489 (__v2di)(__m128i)(index), \
3490 (__v2di)(__m128i)(v1), (int)(scale))
3492 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3494 (__v4di)(__m256i)(index), \
3495 (__v4df)(__m256d)(v1), (int)(scale))
3497 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3499 (__v4di)(__m256i)(index), \
3500 (__v4df)(__m256d)(v1), (int)(scale))
3502 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3504 (__v4di)(__m256i)(index), \
3505 (__v4di)(__m256i)(v1), (int)(scale))
3507 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3509 (__v4di)(__m256i)(index), \
3510 (__v4di)(__m256i)(v1), (int)(scale))
3512 #define _mm_i64scatter_ps(addr, index, v1, scale) \
3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3517 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3522 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3524 (__v2di)(__m128i)(index), \
3525 (__v4si)(__m128i)(v1), (int)(scale))
3527 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3529 (__v2di)(__m128i)(index), \
3530 (__v4si)(__m128i)(v1), (int)(scale))
3532 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3537 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3542 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3544 (__v4di)(__m256i)(index), \
3545 (__v4si)(__m128i)(v1), (int)(scale))
3547 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3549 (__v4di)(__m256i)(index), \
3550 (__v4si)(__m128i)(v1), (int)(scale))
3552 #define _mm_i32scatter_pd(addr, index, v1, scale) \
3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3554 (__v4si)(__m128i)(index), \
3555 (__v2df)(__m128d)(v1), (int)(scale))
3557 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3559 (__v4si)(__m128i)(index), \
3560 (__v2df)(__m128d)(v1), (int)(scale))
3562 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3564 (__v4si)(__m128i)(index), \
3565 (__v2di)(__m128i)(v1), (int)(scale))
3567 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3569 (__v4si)(__m128i)(index), \
3570 (__v2di)(__m128i)(v1), (int)(scale))
3572 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3574 (__v4si)(__m128i)(index), \
3575 (__v4df)(__m256d)(v1), (int)(scale))
3577 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3579 (__v4si)(__m128i)(index), \
3580 (__v4df)(__m256d)(v1), (int)(scale))
3582 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3584 (__v4si)(__m128i)(index), \
3585 (__v4di)(__m256i)(v1), (int)(scale))
3587 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3589 (__v4si)(__m128i)(index), \
3590 (__v4di)(__m256i)(v1), (int)(scale))
3592 #define _mm_i32scatter_ps(addr, index, v1, scale) \
3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3597 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3602 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3604 (__v4si)(__m128i)(index), \
3605 (__v4si)(__m128i)(v1), (int)(scale))
3607 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3609 (__v4si)(__m128i)(index), \
3610 (__v4si)(__m128i)(v1), (int)(scale))
3612 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3617 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3622 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3624 (__v8si)(__m256i)(index), \
3625 (__v8si)(__m256i)(v1), (int)(scale))
3627 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3629 (__v8si)(__m256i)(index), \
3630 (__v8si)(__m256i)(v1), (int)(scale))
3634 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3641 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3648 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3655 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3662 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3669 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3676 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3683 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3690 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3697 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
3704 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3711 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
3718 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3725 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
3732 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3739 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3753 return (__m128i)__builtin_ia32_selectd_128(__U,
3761 return (__m128i)__builtin_ia32_selectd_128(__U,
3769 return (__m128i)__builtin_ia32_selectd_128(__U,
3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3783 return (__m256i)__builtin_ia32_selectd_256(__U,
3791 return (__m256i)__builtin_ia32_selectd_256(__U,
3799 return (__m256i)__builtin_ia32_selectd_256(__U,
3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3812 return (__m128d)__builtin_ia32_selectpd_128(__U,
3819 return (__m128d)__builtin_ia32_selectpd_128(__U,
3821 (__v2df)(__m128d)__I);
3826 return (__m128d)__builtin_ia32_selectpd_128(__U,
3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3840 return (__m256d)__builtin_ia32_selectpd_256(__U,
3848 return (__m256d)__builtin_ia32_selectpd_256(__U,
3850 (__v4df)(__m256d)__I);
3856 return (__m256d)__builtin_ia32_selectpd_256(__U,
3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3869 return (__m128)__builtin_ia32_selectps_128(__U,
3876 return (__m128)__builtin_ia32_selectps_128(__U,
3878 (__v4sf)(__m128)__I);
3883 return (__m128)__builtin_ia32_selectps_128(__U,
3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3896 return (__m256)__builtin_ia32_selectps_256(__U,
3904 return (__m256)__builtin_ia32_selectps_256(__U,
3906 (__v8sf)(__m256)__I);
3912 return (__m256)__builtin_ia32_selectps_256(__U,
3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3926 return (__m128i)__builtin_ia32_selectq_128(__U,
3934 return (__m128i)__builtin_ia32_selectq_128(__U,
3942 return (__m128i)__builtin_ia32_selectq_128(__U,
3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3957 return (__m256i)__builtin_ia32_selectq_256(__U,
3965 return (__m256i)__builtin_ia32_selectq_256(__U,
3973 return (__m256i)__builtin_ia32_selectq_256(__U,
3981 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
3989 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
3997 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4005 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4013 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4021 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4029 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4037 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4045 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4053 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4061 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4069 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4077 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4085 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4093 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4101 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4109 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4117 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4125 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4133 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4142 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4150 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4158 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4166 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4174 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4182 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4190 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4198 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4206 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4214 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4222 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4230 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4238 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4246 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4254 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4262 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4270 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4278 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4286 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4294 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4300 #define _mm_rol_epi32(a, b) \
4301 (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4303 #define _mm_mask_rol_epi32(w, u, a, b) \
4304 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4305 (__v4si)_mm_rol_epi32((a), (b)), \
4306 (__v4si)(__m128i)(w))
4308 #define _mm_maskz_rol_epi32(u, a, b) \
4309 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4310 (__v4si)_mm_rol_epi32((a), (b)), \
4311 (__v4si)_mm_setzero_si128())
4313 #define _mm256_rol_epi32(a, b) \
4314 (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4316 #define _mm256_mask_rol_epi32(w, u, a, b) \
4317 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4318 (__v8si)_mm256_rol_epi32((a), (b)), \
4319 (__v8si)(__m256i)(w))
4321 #define _mm256_maskz_rol_epi32(u, a, b) \
4322 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4323 (__v8si)_mm256_rol_epi32((a), (b)), \
4324 (__v8si)_mm256_setzero_si256())
4326 #define _mm_rol_epi64(a, b) \
4327 (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4329 #define _mm_mask_rol_epi64(w, u, a, b) \
4330 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4331 (__v2di)_mm_rol_epi64((a), (b)), \
4332 (__v2di)(__m128i)(w))
4334 #define _mm_maskz_rol_epi64(u, a, b) \
4335 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4336 (__v2di)_mm_rol_epi64((a), (b)), \
4337 (__v2di)_mm_setzero_si128())
4339 #define _mm256_rol_epi64(a, b) \
4340 (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4342 #define _mm256_mask_rol_epi64(w, u, a, b) \
4343 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4344 (__v4di)_mm256_rol_epi64((a), (b)), \
4345 (__v4di)(__m256i)(w))
4347 #define _mm256_maskz_rol_epi64(u, a, b) \
4348 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4349 (__v4di)_mm256_rol_epi64((a), (b)), \
4350 (__v4di)_mm256_setzero_si256())
4355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4361 return (__m128i)__builtin_ia32_selectd_128(__U,
4369 return (__m128i)__builtin_ia32_selectd_128(__U,
4377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4383 return (__m256i)__builtin_ia32_selectd_256(__U,
4391 return (__m256i)__builtin_ia32_selectd_256(__U,
4399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4405 return (__m128i)__builtin_ia32_selectq_128(__U,
4413 return (__m128i)__builtin_ia32_selectq_128(__U,
4421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4427 return (__m256i)__builtin_ia32_selectq_256(__U,
4435 return (__m256i)__builtin_ia32_selectq_256(__U,
4440 #define _mm_ror_epi32(a, b) \
4441 (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4443 #define _mm_mask_ror_epi32(w, u, a, b) \
4444 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4445 (__v4si)_mm_ror_epi32((a), (b)), \
4446 (__v4si)(__m128i)(w))
4448 #define _mm_maskz_ror_epi32(u, a, b) \
4449 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4450 (__v4si)_mm_ror_epi32((a), (b)), \
4451 (__v4si)_mm_setzero_si128())
4453 #define _mm256_ror_epi32(a, b) \
4454 (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4456 #define _mm256_mask_ror_epi32(w, u, a, b) \
4457 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4458 (__v8si)_mm256_ror_epi32((a), (b)), \
4459 (__v8si)(__m256i)(w))
4461 #define _mm256_maskz_ror_epi32(u, a, b) \
4462 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4463 (__v8si)_mm256_ror_epi32((a), (b)), \
4464 (__v8si)_mm256_setzero_si256())
4466 #define _mm_ror_epi64(a, b) \
4467 (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4469 #define _mm_mask_ror_epi64(w, u, a, b) \
4470 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4471 (__v2di)_mm_ror_epi64((a), (b)), \
4472 (__v2di)(__m128i)(w))
4474 #define _mm_maskz_ror_epi64(u, a, b) \
4475 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4476 (__v2di)_mm_ror_epi64((a), (b)), \
4477 (__v2di)_mm_setzero_si128())
4479 #define _mm256_ror_epi64(a, b) \
4480 (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4482 #define _mm256_mask_ror_epi64(w, u, a, b) \
4483 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4484 (__v4di)_mm256_ror_epi64((a), (b)), \
4485 (__v4di)(__m256i)(w))
4487 #define _mm256_maskz_ror_epi64(u, a, b) \
4488 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4489 (__v4di)_mm256_ror_epi64((a), (b)), \
4490 (__v4di)_mm256_setzero_si256())
4495 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4503 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4511 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4519 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4527 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4535 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4543 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4551 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4559 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4567 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4575 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4583 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4591 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4599 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4607 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4615 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4629 return (__m128i)__builtin_ia32_selectd_128(__U,
4637 return (__m128i)__builtin_ia32_selectd_128(__U,
4645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4651 return (__m256i)__builtin_ia32_selectd_256(__U,
4659 return (__m256i)__builtin_ia32_selectd_256(__U,
4667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4673 return (__m128i)__builtin_ia32_selectq_128(__U,
4681 return (__m128i)__builtin_ia32_selectq_128(__U,
4689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4695 return (__m256i)__builtin_ia32_selectq_256(__U,
4703 return (__m256i)__builtin_ia32_selectq_256(__U,
4711 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4719 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4727 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4735 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4743 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4751 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4759 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4767 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4775 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4783 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4791 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4799 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4807 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4815 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4823 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4831 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4839 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4847 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4855 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4863 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4871 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4879 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4887 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4895 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4903 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4911 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4919 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4927 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4935 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4943 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
4951 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4959 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
4967 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4975 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
4983 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4991 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5005 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5013 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5027 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5035 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
5043 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5051 return (__m128i) __builtin_ia32_selectd_128 ((
__mmask8) __U,
5060 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5068 return (__m256i) __builtin_ia32_selectd_256 ((
__mmask8) __U,
5076 return *(
const __m128i *) __P;
5082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((
const __v4si *) __P,
5091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((
const __v4si *) __P,
5101 return *(
const __m256i *) __P;
5107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((
const __v8si *) __P,
5116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((
const __v8si *) __P,
5126 *(__m128i *) __P = __A;
5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5140 *(__m256i *) __P = __A;
5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5154 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5162 return (__m128i) __builtin_ia32_selectq_128 ((
__mmask8) __U,
5170 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5178 return (__m256i) __builtin_ia32_selectq_256 ((
__mmask8) __U,
5186 return *(
const __m128i *) __P;
5192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((
const __v2di *) __P,
5201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((
const __v2di *) __P,
5211 return *(
const __m256i *) __P;
5217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((
const __v4di *) __P,
5226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((
const __v4di *) __P,
5236 *(__m128i *) __P = __A;
5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5250 *(__m256i *) __P = __A;
5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5264 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5272 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5280 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5288 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5296 return (__m128i)__builtin_ia32_selectd_128(__M,
5304 return (__m128i)__builtin_ia32_selectd_128(__M,
5312 return (__m256i)__builtin_ia32_selectd_256(__M,
5320 return (__m256i)__builtin_ia32_selectd_256(__M,
5329 return (__m128i) __builtin_ia32_selectq_128(__M,
5337 return (__m128i) __builtin_ia32_selectq_128(__M,
5345 return (__m256i) __builtin_ia32_selectq_256(__M,
5353 return (__m256i) __builtin_ia32_selectq_256(__M,
5358 #define _mm_fixupimm_pd(A, B, C, imm) \
5359 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5360 (__v2df)(__m128d)(B), \
5361 (__v2di)(__m128i)(C), (int)(imm), \
5364 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5365 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5370 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5371 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5372 (__v2df)(__m128d)(B), \
5373 (__v2di)(__m128i)(C), \
5374 (int)(imm), (__mmask8)(U))
5376 #define _mm256_fixupimm_pd(A, B, C, imm) \
5377 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5378 (__v4df)(__m256d)(B), \
5379 (__v4di)(__m256i)(C), (int)(imm), \
5382 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5383 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384 (__v4df)(__m256d)(B), \
5385 (__v4di)(__m256i)(C), (int)(imm), \
5388 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5389 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5390 (__v4df)(__m256d)(B), \
5391 (__v4di)(__m256i)(C), \
5392 (int)(imm), (__mmask8)(U))
5394 #define _mm_fixupimm_ps(A, B, C, imm) \
5395 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5396 (__v4sf)(__m128)(B), \
5397 (__v4si)(__m128i)(C), (int)(imm), \
5400 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5401 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402 (__v4sf)(__m128)(B), \
5403 (__v4si)(__m128i)(C), (int)(imm), \
5406 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5407 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5408 (__v4sf)(__m128)(B), \
5409 (__v4si)(__m128i)(C), (int)(imm), \
5412 #define _mm256_fixupimm_ps(A, B, C, imm) \
5413 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5414 (__v8sf)(__m256)(B), \
5415 (__v8si)(__m256i)(C), (int)(imm), \
5418 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5419 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420 (__v8sf)(__m256)(B), \
5421 (__v8si)(__m256i)(C), (int)(imm), \
5424 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5425 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5426 (__v8sf)(__m256)(B), \
5427 (__v8si)(__m256i)(C), (int)(imm), \
5433 return (__m128d) __builtin_ia32_loadapd128_mask ((
const __v2df *) __P,
5441 return (__m128d) __builtin_ia32_loadapd128_mask ((
const __v2df *) __P,
5450 return (__m256d) __builtin_ia32_loadapd256_mask ((
const __v4df *) __P,
5458 return (__m256d) __builtin_ia32_loadapd256_mask ((
const __v4df *) __P,
5467 return (__m128) __builtin_ia32_loadaps128_mask ((
const __v4sf *) __P,
5475 return (__m128) __builtin_ia32_loadaps128_mask ((
const __v4sf *) __P,
5484 return (__m256) __builtin_ia32_loadaps256_mask ((
const __v8sf *) __P,
5492 return (__m256) __builtin_ia32_loadaps256_mask ((
const __v8sf *) __P,
5501 struct __loadu_epi64 {
5504 return ((
const struct __loadu_epi64*)__P)->__v;
5510 return (__m128i) __builtin_ia32_loaddqudi128_mask ((
const __v2di *) __P,
5518 return (__m128i) __builtin_ia32_loaddqudi128_mask ((
const __v2di *) __P,
5527 struct __loadu_epi64 {
5530 return ((
const struct __loadu_epi64*)__P)->__v;
5536 return (__m256i) __builtin_ia32_loaddqudi256_mask ((
const __v4di *) __P,
5544 return (__m256i) __builtin_ia32_loaddqudi256_mask ((
const __v4di *) __P,
5553 struct __loadu_epi32 {
5556 return ((
const struct __loadu_epi32*)__P)->__v;
5562 return (__m128i) __builtin_ia32_loaddqusi128_mask ((
const __v4si *) __P,
5570 return (__m128i) __builtin_ia32_loaddqusi128_mask ((
const __v4si *) __P,
5579 struct __loadu_epi32 {
5582 return ((
const struct __loadu_epi32*)__P)->__v;
5588 return (__m256i) __builtin_ia32_loaddqusi256_mask ((
const __v8si *) __P,
5596 return (__m256i) __builtin_ia32_loaddqusi256_mask ((
const __v8si *) __P,
5605 return (__m128d) __builtin_ia32_loadupd128_mask ((
const __v2df *) __P,
5613 return (__m128d) __builtin_ia32_loadupd128_mask ((
const __v2df *) __P,
5622 return (__m256d) __builtin_ia32_loadupd256_mask ((
const __v4df *) __P,
5630 return (__m256d) __builtin_ia32_loadupd256_mask ((
const __v4df *) __P,
5639 return (__m128) __builtin_ia32_loadups128_mask ((
const __v4sf *) __P,
5647 return (__m128) __builtin_ia32_loadups128_mask ((
const __v4sf *) __P,
5656 return (__m256) __builtin_ia32_loadups256_mask ((
const __v8sf *) __P,
5664 return (__m256) __builtin_ia32_loadups256_mask ((
const __v8sf *) __P,
5673 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5681 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5689 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5697 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5705 struct __storeu_epi64 {
5708 ((
struct __storeu_epi64*)__P)->__v = __A;
5714 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5722 struct __storeu_epi64 {
5725 ((
struct __storeu_epi64*)__P)->__v = __A;
5731 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5739 struct __storeu_epi32 {
5742 ((
struct __storeu_epi32*)__P)->__v = __A;
5748 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5756 struct __storeu_epi32 {
5759 ((
struct __storeu_epi32*)__P)->__v = __A;
5765 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5773 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5781 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5789 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5797 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5806 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5814 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5822 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5830 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5838 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5846 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5854 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5862 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5870 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5878 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
5886 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5894 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
5902 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5910 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
5918 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5926 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
5934 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5943 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5951 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5960 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5969 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5977 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5986 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5995 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6003 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6012 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6021 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6029 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6035 #define _mm_mask_permute_pd(W, U, X, C) \
6036 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6037 (__v2df)_mm_permute_pd((X), (C)), \
6038 (__v2df)(__m128d)(W))
6040 #define _mm_maskz_permute_pd(U, X, C) \
6041 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6042 (__v2df)_mm_permute_pd((X), (C)), \
6043 (__v2df)_mm_setzero_pd())
6045 #define _mm256_mask_permute_pd(W, U, X, C) \
6046 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6047 (__v4df)_mm256_permute_pd((X), (C)), \
6048 (__v4df)(__m256d)(W))
6050 #define _mm256_maskz_permute_pd(U, X, C) \
6051 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6052 (__v4df)_mm256_permute_pd((X), (C)), \
6053 (__v4df)_mm256_setzero_pd())
6055 #define _mm_mask_permute_ps(W, U, X, C) \
6056 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6057 (__v4sf)_mm_permute_ps((X), (C)), \
6058 (__v4sf)(__m128)(W))
6060 #define _mm_maskz_permute_ps(U, X, C) \
6061 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6062 (__v4sf)_mm_permute_ps((X), (C)), \
6063 (__v4sf)_mm_setzero_ps())
6065 #define _mm256_mask_permute_ps(W, U, X, C) \
6066 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6067 (__v8sf)_mm256_permute_ps((X), (C)), \
6068 (__v8sf)(__m256)(W))
6070 #define _mm256_maskz_permute_ps(U, X, C) \
6071 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6072 (__v8sf)_mm256_permute_ps((X), (C)), \
6073 (__v8sf)_mm256_setzero_ps())
6078 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6086 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
6094 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6102 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
6110 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6118 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
6126 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6134 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
6250 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6258 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6266 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6274 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6282 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6290 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6298 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6306 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6314 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6322 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6330 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6338 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6346 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6354 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
6362 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6370 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
6378 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6386 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6394 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6402 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6410 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6418 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__U,
6426 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6434 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__U,
6442 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6448 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6456 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6464 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6470 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6478 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6486 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6492 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6500 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U, \
6508 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6515 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6523 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U, \
6528 #define _mm_ternarylogic_epi32(A, B, C, imm) \
6529 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6530 (__v4si)(__m128i)(B), \
6531 (__v4si)(__m128i)(C), (int)(imm), \
6534 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6535 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6536 (__v4si)(__m128i)(B), \
6537 (__v4si)(__m128i)(C), (int)(imm), \
6540 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6541 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6542 (__v4si)(__m128i)(B), \
6543 (__v4si)(__m128i)(C), (int)(imm), \
6546 #define _mm256_ternarylogic_epi32(A, B, C, imm) \
6547 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6548 (__v8si)(__m256i)(B), \
6549 (__v8si)(__m256i)(C), (int)(imm), \
6552 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6553 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6554 (__v8si)(__m256i)(B), \
6555 (__v8si)(__m256i)(C), (int)(imm), \
6558 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6559 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6560 (__v8si)(__m256i)(B), \
6561 (__v8si)(__m256i)(C), (int)(imm), \
6564 #define _mm_ternarylogic_epi64(A, B, C, imm) \
6565 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6566 (__v2di)(__m128i)(B), \
6567 (__v2di)(__m128i)(C), (int)(imm), \
6570 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6571 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6572 (__v2di)(__m128i)(B), \
6573 (__v2di)(__m128i)(C), (int)(imm), \
6576 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6577 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6578 (__v2di)(__m128i)(B), \
6579 (__v2di)(__m128i)(C), (int)(imm), \
6582 #define _mm256_ternarylogic_epi64(A, B, C, imm) \
6583 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6584 (__v4di)(__m256i)(B), \
6585 (__v4di)(__m256i)(C), (int)(imm), \
6588 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6589 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6590 (__v4di)(__m256i)(B), \
6591 (__v4di)(__m256i)(C), (int)(imm), \
6594 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6595 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6596 (__v4di)(__m256i)(B), \
6597 (__v4di)(__m256i)(C), (int)(imm), \
6602 #define _mm256_shuffle_f32x4(A, B, imm) \
6603 (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6604 (__v8sf)(__m256)(B), (int)(imm))
6606 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6607 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6608 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6609 (__v8sf)(__m256)(W))
6611 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6612 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6613 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6614 (__v8sf)_mm256_setzero_ps())
6616 #define _mm256_shuffle_f64x2(A, B, imm) \
6617 (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6618 (__v4df)(__m256d)(B), (int)(imm))
6620 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6621 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6622 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6623 (__v4df)(__m256d)(W))
6625 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6626 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6627 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6628 (__v4df)_mm256_setzero_pd())
6630 #define _mm256_shuffle_i32x4(A, B, imm) \
6631 (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6632 (__v8si)(__m256i)(B), (int)(imm))
6634 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6635 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6636 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6637 (__v8si)(__m256i)(W))
6639 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6640 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6641 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6642 (__v8si)_mm256_setzero_si256())
6644 #define _mm256_shuffle_i64x2(A, B, imm) \
6645 (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6646 (__v4di)(__m256i)(B), (int)(imm))
6648 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6649 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6650 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6651 (__v4di)(__m256i)(W))
6654 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6655 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6656 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6657 (__v4di)_mm256_setzero_si256())
6659 #define _mm_mask_shuffle_pd(W, U, A, B, M) \
6660 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6661 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6662 (__v2df)(__m128d)(W))
6664 #define _mm_maskz_shuffle_pd(U, A, B, M) \
6665 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6666 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6667 (__v2df)_mm_setzero_pd())
6669 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6670 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6671 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6672 (__v4df)(__m256d)(W))
6674 #define _mm256_maskz_shuffle_pd(U, A, B, M) \
6675 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6676 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6677 (__v4df)_mm256_setzero_pd())
6679 #define _mm_mask_shuffle_ps(W, U, A, B, M) \
6680 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6681 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6682 (__v4sf)(__m128)(W))
6684 #define _mm_maskz_shuffle_ps(U, A, B, M) \
6685 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6686 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6687 (__v4sf)_mm_setzero_ps())
6689 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6690 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6691 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6692 (__v8sf)(__m256)(W))
6694 #define _mm256_maskz_shuffle_ps(U, A, B, M) \
6695 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6696 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6697 (__v8sf)_mm256_setzero_ps())
6702 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6711 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6719 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6728 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6737 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6745 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6754 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6763 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6771 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6780 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6789 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6797 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6806 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6807 0, 1, 2, 3, 0, 1, 2, 3);
6813 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
6821 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
6829 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6830 0, 1, 2, 3, 0, 1, 2, 3);
6836 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
6844 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
6852 return (__m256d)__builtin_ia32_selectpd_256(__M,
6860 return (__m256d)__builtin_ia32_selectpd_256(__M,
6868 return (__m128)__builtin_ia32_selectps_128(__M,
6876 return (__m128)__builtin_ia32_selectps_128(__M,
6884 return (__m256)__builtin_ia32_selectps_256(__M,
6892 return (__m256)__builtin_ia32_selectps_256(__M,
6900 return (__m128i)__builtin_ia32_selectd_128(__M,
6908 return (__m128i)__builtin_ia32_selectd_128(__M,
6916 return (__m256i)__builtin_ia32_selectd_256(__M,
6924 return (__m256i)__builtin_ia32_selectd_256(__M,
6932 return (__m128i)__builtin_ia32_selectq_128(__M,
6940 return (__m128i)__builtin_ia32_selectq_128(__M,
6948 return (__m256i)__builtin_ia32_selectq_256(__M,
6956 return (__m256i)__builtin_ia32_selectq_256(__M,
6964 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6972 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6973 (__v16qi) __O, __M);
6979 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6987 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6993 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7001 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7002 (__v16qi) __O, __M);
7008 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7016 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7022 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7030 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7038 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7046 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7052 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7060 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7067 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7075 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7081 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7089 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7090 (__v16qi) __O, __M);
7096 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7104 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7110 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7118 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7119 (__v16qi) __O, __M);
7125 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7133 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7139 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7147 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7154 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7162 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7168 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7176 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7184 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7192 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7198 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7206 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7213 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7221 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7227 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7235 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7242 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7250 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7256 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7264 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7272 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7280 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7286 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7294 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7302 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7310 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7316 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7324 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7331 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7339 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7345 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7353 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7360 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7368 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7374 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7382 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7390 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7398 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7404 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7412 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7420 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7428 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7434 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7442 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7449 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7457 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7463 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7471 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7478 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7486 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7492 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7500 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7507 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7515 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7521 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7529 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7536 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7544 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7550 return (__m128i)__builtin_shufflevector(
7551 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7552 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7558 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7559 (__v16qi) __O, __M);
7565 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7574 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7580 return (__m128i)__builtin_shufflevector(
7581 __builtin_convertvector((__v8si)__A, __v8qi),
7582 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7589 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7590 (__v16qi) __O, __M);
7596 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7604 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7610 return (__m128i)__builtin_shufflevector(
7611 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7618 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7625 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7633 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7639 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7645 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7652 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7660 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7666 return (__m128i)__builtin_shufflevector(
7667 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7668 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7674 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7675 (__v16qi) __O, __M);
7681 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7689 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7695 return (__m128i)__builtin_shufflevector(
7696 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7697 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7703 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7704 (__v16qi) __O, __M);
7710 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7718 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7724 return (__m128i)__builtin_shufflevector(
7725 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7731 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7738 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7746 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7752 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7758 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
7766 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
7774 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7780 return (__m128i)__builtin_shufflevector(
7781 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7788 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7796 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7804 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7810 return (__m128i)__builtin_shufflevector(
7811 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7818 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7825 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7833 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7836 #define _mm256_extractf32x4_ps(A, imm) \
7837 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7839 (__v4sf)_mm_undefined_ps(), \
7842 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7843 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7845 (__v4sf)(__m128)(W), \
7848 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7849 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7851 (__v4sf)_mm_setzero_ps(), \
7854 #define _mm256_extracti32x4_epi32(A, imm) \
7855 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7857 (__v4si)_mm_undefined_si128(), \
7860 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7861 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7863 (__v4si)(__m128i)(W), \
7866 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7867 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7869 (__v4si)_mm_setzero_si128(), \
7872 #define _mm256_insertf32x4(A, B, imm) \
7873 (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7874 (__v4sf)(__m128)(B), (int)(imm))
7876 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7877 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7878 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7879 (__v8sf)(__m256)(W))
7881 #define _mm256_maskz_insertf32x4(U, A, B, imm) \
7882 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7883 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7884 (__v8sf)_mm256_setzero_ps())
7886 #define _mm256_inserti32x4(A, B, imm) \
7887 (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7888 (__v4si)(__m128i)(B), (int)(imm))
7890 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7891 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7892 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7893 (__v8si)(__m256i)(W))
7895 #define _mm256_maskz_inserti32x4(U, A, B, imm) \
7896 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7897 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7898 (__v8si)_mm256_setzero_si256())
7900 #define _mm_getmant_pd(A, B, C) \
7901 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7902 (int)(((C)<<2) | (B)), \
7903 (__v2df)_mm_setzero_pd(), \
7906 #define _mm_mask_getmant_pd(W, U, A, B, C) \
7907 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7908 (int)(((C)<<2) | (B)), \
7909 (__v2df)(__m128d)(W), \
7912 #define _mm_maskz_getmant_pd(U, A, B, C) \
7913 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7914 (int)(((C)<<2) | (B)), \
7915 (__v2df)_mm_setzero_pd(), \
7918 #define _mm256_getmant_pd(A, B, C) \
7919 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7920 (int)(((C)<<2) | (B)), \
7921 (__v4df)_mm256_setzero_pd(), \
7924 #define _mm256_mask_getmant_pd(W, U, A, B, C) \
7925 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7926 (int)(((C)<<2) | (B)), \
7927 (__v4df)(__m256d)(W), \
7930 #define _mm256_maskz_getmant_pd(U, A, B, C) \
7931 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7932 (int)(((C)<<2) | (B)), \
7933 (__v4df)_mm256_setzero_pd(), \
7936 #define _mm_getmant_ps(A, B, C) \
7937 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7938 (int)(((C)<<2) | (B)), \
7939 (__v4sf)_mm_setzero_ps(), \
7942 #define _mm_mask_getmant_ps(W, U, A, B, C) \
7943 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7944 (int)(((C)<<2) | (B)), \
7945 (__v4sf)(__m128)(W), \
7948 #define _mm_maskz_getmant_ps(U, A, B, C) \
7949 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7950 (int)(((C)<<2) | (B)), \
7951 (__v4sf)_mm_setzero_ps(), \
7954 #define _mm256_getmant_ps(A, B, C) \
7955 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7956 (int)(((C)<<2) | (B)), \
7957 (__v8sf)_mm256_setzero_ps(), \
7960 #define _mm256_mask_getmant_ps(W, U, A, B, C) \
7961 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7962 (int)(((C)<<2) | (B)), \
7963 (__v8sf)(__m256)(W), \
7966 #define _mm256_maskz_getmant_ps(U, A, B, C) \
7967 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7968 (int)(((C)<<2) | (B)), \
7969 (__v8sf)_mm256_setzero_ps(), \
7972 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7973 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7974 (void const *)(addr), \
7975 (__v2di)(__m128i)(index), \
7976 (__mmask8)(mask), (int)(scale))
7978 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7979 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7980 (void const *)(addr), \
7981 (__v2di)(__m128i)(index), \
7982 (__mmask8)(mask), (int)(scale))
7984 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7985 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7986 (void const *)(addr), \
7987 (__v4di)(__m256i)(index), \
7988 (__mmask8)(mask), (int)(scale))
7990 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7991 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7992 (void const *)(addr), \
7993 (__v4di)(__m256i)(index), \
7994 (__mmask8)(mask), (int)(scale))
7996 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7997 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7998 (void const *)(addr), \
7999 (__v2di)(__m128i)(index), \
8000 (__mmask8)(mask), (int)(scale))
8002 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8003 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8004 (void const *)(addr), \
8005 (__v2di)(__m128i)(index), \
8006 (__mmask8)(mask), (int)(scale))
8008 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8009 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8010 (void const *)(addr), \
8011 (__v4di)(__m256i)(index), \
8012 (__mmask8)(mask), (int)(scale))
8014 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8015 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8016 (void const *)(addr), \
8017 (__v4di)(__m256i)(index), \
8018 (__mmask8)(mask), (int)(scale))
8020 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8021 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8022 (void const *)(addr), \
8023 (__v4si)(__m128i)(index), \
8024 (__mmask8)(mask), (int)(scale))
8026 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8027 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8028 (void const *)(addr), \
8029 (__v4si)(__m128i)(index), \
8030 (__mmask8)(mask), (int)(scale))
8032 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8033 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8034 (void const *)(addr), \
8035 (__v4si)(__m128i)(index), \
8036 (__mmask8)(mask), (int)(scale))
8038 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8039 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8040 (void const *)(addr), \
8041 (__v4si)(__m128i)(index), \
8042 (__mmask8)(mask), (int)(scale))
8044 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8045 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8046 (void const *)(addr), \
8047 (__v4si)(__m128i)(index), \
8048 (__mmask8)(mask), (int)(scale))
8050 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8051 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8052 (void const *)(addr), \
8053 (__v4si)(__m128i)(index), \
8054 (__mmask8)(mask), (int)(scale))
8056 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8057 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8058 (void const *)(addr), \
8059 (__v8si)(__m256i)(index), \
8060 (__mmask8)(mask), (int)(scale))
8062 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8063 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8064 (void const *)(addr), \
8065 (__v8si)(__m256i)(index), \
8066 (__mmask8)(mask), (int)(scale))
8068 #define _mm256_permutex_pd(X, C) \
8069 (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
8071 #define _mm256_mask_permutex_pd(W, U, X, C) \
8072 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8073 (__v4df)_mm256_permutex_pd((X), (C)), \
8074 (__v4df)(__m256d)(W))
8076 #define _mm256_maskz_permutex_pd(U, X, C) \
8077 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8078 (__v4df)_mm256_permutex_pd((X), (C)), \
8079 (__v4df)_mm256_setzero_pd())
8081 #define _mm256_permutex_epi64(X, C) \
8082 (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
8084 #define _mm256_mask_permutex_epi64(W, U, X, C) \
8085 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8086 (__v4di)_mm256_permutex_epi64((X), (C)), \
8087 (__v4di)(__m256i)(W))
8089 #define _mm256_maskz_permutex_epi64(U, X, C) \
8090 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8091 (__v4di)_mm256_permutex_epi64((X), (C)), \
8092 (__v4di)_mm256_setzero_si256())
8097 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8104 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
8112 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
8120 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8126 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
8135 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
8140 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8145 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8153 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8158 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8164 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
8172 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
8177 #define _mm_alignr_epi32(A, B, imm) \
8178 (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8179 (__v4si)(__m128i)(B), (int)(imm))
8181 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8182 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8183 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8184 (__v4si)(__m128i)(W))
8186 #define _mm_maskz_alignr_epi32(U, A, B, imm) \
8187 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8188 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8189 (__v4si)_mm_setzero_si128())
8191 #define _mm256_alignr_epi32(A, B, imm) \
8192 (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8193 (__v8si)(__m256i)(B), (int)(imm))
8195 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8196 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8197 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8198 (__v8si)(__m256i)(W))
8200 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8201 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8202 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8203 (__v8si)_mm256_setzero_si256())
8205 #define _mm_alignr_epi64(A, B, imm) \
8206 (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8207 (__v2di)(__m128i)(B), (int)(imm))
8209 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8210 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8211 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8212 (__v2di)(__m128i)(W))
8214 #define _mm_maskz_alignr_epi64(U, A, B, imm) \
8215 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8216 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8217 (__v2di)_mm_setzero_si128())
8219 #define _mm256_alignr_epi64(A, B, imm) \
8220 (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8221 (__v4di)(__m256i)(B), (int)(imm))
8223 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8224 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8225 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8226 (__v4di)(__m256i)(W))
8228 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8229 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8230 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8231 (__v4di)_mm256_setzero_si256())
8236 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8244 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8252 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8260 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8268 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8276 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
8284 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8292 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
8297 #define _mm256_mask_shuffle_epi32(W, U, A, I) \
8298 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8299 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8300 (__v8si)(__m256i)(W))
8302 #define _mm256_maskz_shuffle_epi32(U, A, I) \
8303 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8304 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8305 (__v8si)_mm256_setzero_si256())
8307 #define _mm_mask_shuffle_epi32(W, U, A, I) \
8308 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8309 (__v4si)_mm_shuffle_epi32((A), (I)), \
8310 (__v4si)(__m128i)(W))
8312 #define _mm_maskz_shuffle_epi32(U, A, I) \
8313 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8314 (__v4si)_mm_shuffle_epi32((A), (I)), \
8315 (__v4si)_mm_setzero_si128())
8320 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8328 return (__m128d) __builtin_ia32_selectpd_128 ((
__mmask8) __U,
8336 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8344 return (__m256d) __builtin_ia32_selectpd_256 ((
__mmask8) __U,
8352 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8360 return (__m128) __builtin_ia32_selectps_128 ((
__mmask8) __U,
8368 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8376 return (__m256) __builtin_ia32_selectps_256 ((
__mmask8) __U,
8384 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8392 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8401 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8409 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8415 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8416 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8417 (__v8hi)(__m128i)(W), \
8420 #define _mm_maskz_cvt_roundps_ph(U, A, I) \
8421 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8422 (__v8hi)_mm_setzero_si128(), \
8425 #define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8426 #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8428 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8429 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8430 (__v8hi)(__m128i)(W), \
8433 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8434 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8435 (__v8hi)_mm_setzero_si128(), \
8438 #define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8439 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8442 #undef __DEFAULT_FN_ATTRS128
8443 #undef __DEFAULT_FN_ATTRS256
static __inline__ vector float vector float __b
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64(void *__P, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64(void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64(__m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
#define _mm256_permutexvar_epi32(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16(__m256i __A)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
short __v2hi __attribute__((__vector_size__(4)))
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16(__m128i __A)
#define _mm256_permutexvar_ps(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps(__m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64(void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a)
Converts a vector of [8 x i32] into a vector of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a)
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 25...
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a)
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32], truncating the result b...
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32].
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32], truncating the result by rounding toward...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a)
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ void int __a
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32], truncating the result when it is inexact...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
static __inline__ unsigned char int __C
struct __storeu_i16 *__P __v
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...