ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
avx512vlintrin.h
Go to the documentation of this file.
1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __AVX512VLINTRIN_H
15#define __AVX512VLINTRIN_H
16
17#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
18#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
19
20typedef short __v2hi __attribute__((__vector_size__(4)));
21typedef char __v4qi __attribute__((__vector_size__(4)));
22typedef char __v2qi __attribute__((__vector_size__(2)));
23
24/* Integer compare */
25
26#define _mm_cmpeq_epi32_mask(A, B) \
27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
28#define _mm_mask_cmpeq_epi32_mask(k, A, B) \
29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
30#define _mm_cmpge_epi32_mask(A, B) \
31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
32#define _mm_mask_cmpge_epi32_mask(k, A, B) \
33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
34#define _mm_cmpgt_epi32_mask(A, B) \
35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
36#define _mm_mask_cmpgt_epi32_mask(k, A, B) \
37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
38#define _mm_cmple_epi32_mask(A, B) \
39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
40#define _mm_mask_cmple_epi32_mask(k, A, B) \
41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
42#define _mm_cmplt_epi32_mask(A, B) \
43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
44#define _mm_mask_cmplt_epi32_mask(k, A, B) \
45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
46#define _mm_cmpneq_epi32_mask(A, B) \
47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
48#define _mm_mask_cmpneq_epi32_mask(k, A, B) \
49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
50
51#define _mm256_cmpeq_epi32_mask(A, B) \
52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
53#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
55#define _mm256_cmpge_epi32_mask(A, B) \
56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
57#define _mm256_mask_cmpge_epi32_mask(k, A, B) \
58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
59#define _mm256_cmpgt_epi32_mask(A, B) \
60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
61#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
63#define _mm256_cmple_epi32_mask(A, B) \
64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
65#define _mm256_mask_cmple_epi32_mask(k, A, B) \
66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
67#define _mm256_cmplt_epi32_mask(A, B) \
68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
69#define _mm256_mask_cmplt_epi32_mask(k, A, B) \
70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
71#define _mm256_cmpneq_epi32_mask(A, B) \
72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
73#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
75
76#define _mm_cmpeq_epu32_mask(A, B) \
77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
78#define _mm_mask_cmpeq_epu32_mask(k, A, B) \
79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
80#define _mm_cmpge_epu32_mask(A, B) \
81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
82#define _mm_mask_cmpge_epu32_mask(k, A, B) \
83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
84#define _mm_cmpgt_epu32_mask(A, B) \
85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
86#define _mm_mask_cmpgt_epu32_mask(k, A, B) \
87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
88#define _mm_cmple_epu32_mask(A, B) \
89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
90#define _mm_mask_cmple_epu32_mask(k, A, B) \
91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
92#define _mm_cmplt_epu32_mask(A, B) \
93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
94#define _mm_mask_cmplt_epu32_mask(k, A, B) \
95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
96#define _mm_cmpneq_epu32_mask(A, B) \
97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
98#define _mm_mask_cmpneq_epu32_mask(k, A, B) \
99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
100
101#define _mm256_cmpeq_epu32_mask(A, B) \
102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
103#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
105#define _mm256_cmpge_epu32_mask(A, B) \
106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
107#define _mm256_mask_cmpge_epu32_mask(k, A, B) \
108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
109#define _mm256_cmpgt_epu32_mask(A, B) \
110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
111#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
113#define _mm256_cmple_epu32_mask(A, B) \
114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
115#define _mm256_mask_cmple_epu32_mask(k, A, B) \
116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
117#define _mm256_cmplt_epu32_mask(A, B) \
118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
119#define _mm256_mask_cmplt_epu32_mask(k, A, B) \
120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
121#define _mm256_cmpneq_epu32_mask(A, B) \
122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
123#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
125
126#define _mm_cmpeq_epi64_mask(A, B) \
127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
128#define _mm_mask_cmpeq_epi64_mask(k, A, B) \
129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
130#define _mm_cmpge_epi64_mask(A, B) \
131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
132#define _mm_mask_cmpge_epi64_mask(k, A, B) \
133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
134#define _mm_cmpgt_epi64_mask(A, B) \
135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
136#define _mm_mask_cmpgt_epi64_mask(k, A, B) \
137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
138#define _mm_cmple_epi64_mask(A, B) \
139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
140#define _mm_mask_cmple_epi64_mask(k, A, B) \
141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
142#define _mm_cmplt_epi64_mask(A, B) \
143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
144#define _mm_mask_cmplt_epi64_mask(k, A, B) \
145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
146#define _mm_cmpneq_epi64_mask(A, B) \
147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
148#define _mm_mask_cmpneq_epi64_mask(k, A, B) \
149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
150
151#define _mm256_cmpeq_epi64_mask(A, B) \
152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
153#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
155#define _mm256_cmpge_epi64_mask(A, B) \
156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
157#define _mm256_mask_cmpge_epi64_mask(k, A, B) \
158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
159#define _mm256_cmpgt_epi64_mask(A, B) \
160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
161#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
163#define _mm256_cmple_epi64_mask(A, B) \
164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
165#define _mm256_mask_cmple_epi64_mask(k, A, B) \
166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
167#define _mm256_cmplt_epi64_mask(A, B) \
168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
169#define _mm256_mask_cmplt_epi64_mask(k, A, B) \
170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
171#define _mm256_cmpneq_epi64_mask(A, B) \
172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
173#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
175
176#define _mm_cmpeq_epu64_mask(A, B) \
177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
178#define _mm_mask_cmpeq_epu64_mask(k, A, B) \
179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
180#define _mm_cmpge_epu64_mask(A, B) \
181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
182#define _mm_mask_cmpge_epu64_mask(k, A, B) \
183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
184#define _mm_cmpgt_epu64_mask(A, B) \
185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
186#define _mm_mask_cmpgt_epu64_mask(k, A, B) \
187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
188#define _mm_cmple_epu64_mask(A, B) \
189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
190#define _mm_mask_cmple_epu64_mask(k, A, B) \
191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
192#define _mm_cmplt_epu64_mask(A, B) \
193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
194#define _mm_mask_cmplt_epu64_mask(k, A, B) \
195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
196#define _mm_cmpneq_epu64_mask(A, B) \
197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
198#define _mm_mask_cmpneq_epu64_mask(k, A, B) \
199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
200
201#define _mm256_cmpeq_epu64_mask(A, B) \
202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
203#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
205#define _mm256_cmpge_epu64_mask(A, B) \
206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
207#define _mm256_mask_cmpge_epu64_mask(k, A, B) \
208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
209#define _mm256_cmpgt_epu64_mask(A, B) \
210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
211#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
213#define _mm256_cmple_epu64_mask(A, B) \
214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
215#define _mm256_mask_cmple_epu64_mask(k, A, B) \
216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
217#define _mm256_cmplt_epu64_mask(A, B) \
218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
219#define _mm256_mask_cmplt_epu64_mask(k, A, B) \
220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
221#define _mm256_cmpneq_epu64_mask(A, B) \
222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
223#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
225
226static __inline__ __m256i __DEFAULT_FN_ATTRS256
227_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
228{
229 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
230 (__v8si)_mm256_add_epi32(__A, __B),
231 (__v8si)__W);
232}
233
234static __inline__ __m256i __DEFAULT_FN_ATTRS256
235_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
236{
237 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
238 (__v8si)_mm256_add_epi32(__A, __B),
239 (__v8si)_mm256_setzero_si256());
240}
241
242static __inline__ __m256i __DEFAULT_FN_ATTRS256
243_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
244{
245 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
246 (__v4di)_mm256_add_epi64(__A, __B),
247 (__v4di)__W);
248}
249
250static __inline__ __m256i __DEFAULT_FN_ATTRS256
251_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
252{
253 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
254 (__v4di)_mm256_add_epi64(__A, __B),
255 (__v4di)_mm256_setzero_si256());
256}
257
258static __inline__ __m256i __DEFAULT_FN_ATTRS256
259_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
260{
261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
262 (__v8si)_mm256_sub_epi32(__A, __B),
263 (__v8si)__W);
264}
265
266static __inline__ __m256i __DEFAULT_FN_ATTRS256
267_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
268{
269 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
270 (__v8si)_mm256_sub_epi32(__A, __B),
271 (__v8si)_mm256_setzero_si256());
272}
273
274static __inline__ __m256i __DEFAULT_FN_ATTRS256
275_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
276{
277 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
278 (__v4di)_mm256_sub_epi64(__A, __B),
279 (__v4di)__W);
280}
281
282static __inline__ __m256i __DEFAULT_FN_ATTRS256
283_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
284{
285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
286 (__v4di)_mm256_sub_epi64(__A, __B),
287 (__v4di)_mm256_setzero_si256());
288}
289
290static __inline__ __m128i __DEFAULT_FN_ATTRS128
291_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
292{
293 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
294 (__v4si)_mm_add_epi32(__A, __B),
295 (__v4si)__W);
296}
297
298static __inline__ __m128i __DEFAULT_FN_ATTRS128
299_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
300{
301 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
302 (__v4si)_mm_add_epi32(__A, __B),
303 (__v4si)_mm_setzero_si128());
304}
305
306static __inline__ __m128i __DEFAULT_FN_ATTRS128
307_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
308{
309 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
310 (__v2di)_mm_add_epi64(__A, __B),
311 (__v2di)__W);
312}
313
314static __inline__ __m128i __DEFAULT_FN_ATTRS128
315_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
316{
317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
318 (__v2di)_mm_add_epi64(__A, __B),
319 (__v2di)_mm_setzero_si128());
320}
321
322static __inline__ __m128i __DEFAULT_FN_ATTRS128
323_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
324{
325 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
326 (__v4si)_mm_sub_epi32(__A, __B),
327 (__v4si)__W);
328}
329
330static __inline__ __m128i __DEFAULT_FN_ATTRS128
331_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
332{
333 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
334 (__v4si)_mm_sub_epi32(__A, __B),
335 (__v4si)_mm_setzero_si128());
336}
337
338static __inline__ __m128i __DEFAULT_FN_ATTRS128
339_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
340{
341 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
342 (__v2di)_mm_sub_epi64(__A, __B),
343 (__v2di)__W);
344}
345
346static __inline__ __m128i __DEFAULT_FN_ATTRS128
347_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
348{
349 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
350 (__v2di)_mm_sub_epi64(__A, __B),
351 (__v2di)_mm_setzero_si128());
352}
353
354static __inline__ __m256i __DEFAULT_FN_ATTRS256
355_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
356{
357 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
358 (__v4di)_mm256_mul_epi32(__X, __Y),
359 (__v4di)__W);
360}
361
362static __inline__ __m256i __DEFAULT_FN_ATTRS256
363_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
364{
365 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
366 (__v4di)_mm256_mul_epi32(__X, __Y),
367 (__v4di)_mm256_setzero_si256());
368}
369
370static __inline__ __m128i __DEFAULT_FN_ATTRS128
371_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
372{
373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
374 (__v2di)_mm_mul_epi32(__X, __Y),
375 (__v2di)__W);
376}
377
378static __inline__ __m128i __DEFAULT_FN_ATTRS128
379_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
380{
381 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
382 (__v2di)_mm_mul_epi32(__X, __Y),
383 (__v2di)_mm_setzero_si128());
384}
385
386static __inline__ __m256i __DEFAULT_FN_ATTRS256
387_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
388{
389 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
390 (__v4di)_mm256_mul_epu32(__X, __Y),
391 (__v4di)__W);
392}
393
394static __inline__ __m256i __DEFAULT_FN_ATTRS256
395_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
396{
397 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
398 (__v4di)_mm256_mul_epu32(__X, __Y),
399 (__v4di)_mm256_setzero_si256());
400}
401
402static __inline__ __m128i __DEFAULT_FN_ATTRS128
403_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
404{
405 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
406 (__v2di)_mm_mul_epu32(__X, __Y),
407 (__v2di)__W);
408}
409
410static __inline__ __m128i __DEFAULT_FN_ATTRS128
411_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
412{
413 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
414 (__v2di)_mm_mul_epu32(__X, __Y),
415 (__v2di)_mm_setzero_si128());
416}
417
418static __inline__ __m256i __DEFAULT_FN_ATTRS256
419_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
420{
421 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
422 (__v8si)_mm256_mullo_epi32(__A, __B),
423 (__v8si)_mm256_setzero_si256());
424}
425
426static __inline__ __m256i __DEFAULT_FN_ATTRS256
427_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
428{
429 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
430 (__v8si)_mm256_mullo_epi32(__A, __B),
431 (__v8si)__W);
432}
433
434static __inline__ __m128i __DEFAULT_FN_ATTRS128
435_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
436{
437 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
438 (__v4si)_mm_mullo_epi32(__A, __B),
439 (__v4si)_mm_setzero_si128());
440}
441
442static __inline__ __m128i __DEFAULT_FN_ATTRS128
443_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
444{
445 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
446 (__v4si)_mm_mullo_epi32(__A, __B),
447 (__v4si)__W);
448}
449
450static __inline__ __m256i __DEFAULT_FN_ATTRS256
451_mm256_and_epi32(__m256i __a, __m256i __b)
452{
453 return (__m256i)((__v8su)__a & (__v8su)__b);
454}
455
456static __inline__ __m256i __DEFAULT_FN_ATTRS256
457_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
458{
459 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
460 (__v8si)_mm256_and_epi32(__A, __B),
461 (__v8si)__W);
462}
463
464static __inline__ __m256i __DEFAULT_FN_ATTRS256
465_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
466{
467 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
468}
469
470static __inline__ __m128i __DEFAULT_FN_ATTRS128
471_mm_and_epi32(__m128i __a, __m128i __b)
472{
473 return (__m128i)((__v4su)__a & (__v4su)__b);
474}
475
476static __inline__ __m128i __DEFAULT_FN_ATTRS128
477_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
478{
479 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
480 (__v4si)_mm_and_epi32(__A, __B),
481 (__v4si)__W);
482}
483
484static __inline__ __m128i __DEFAULT_FN_ATTRS128
485_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
486{
487 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
488}
489
490static __inline__ __m256i __DEFAULT_FN_ATTRS256
491_mm256_andnot_epi32(__m256i __A, __m256i __B)
492{
493 return (__m256i)(~(__v8su)__A & (__v8su)__B);
494}
495
496static __inline__ __m256i __DEFAULT_FN_ATTRS256
497_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
498{
499 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
500 (__v8si)_mm256_andnot_epi32(__A, __B),
501 (__v8si)__W);
502}
503
504static __inline__ __m256i __DEFAULT_FN_ATTRS256
505_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
506{
508 __U, __A, __B);
509}
510
511static __inline__ __m128i __DEFAULT_FN_ATTRS128
512_mm_andnot_epi32(__m128i __A, __m128i __B)
513{
514 return (__m128i)(~(__v4su)__A & (__v4su)__B);
515}
516
517static __inline__ __m128i __DEFAULT_FN_ATTRS128
518_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
519{
520 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
521 (__v4si)_mm_andnot_epi32(__A, __B),
522 (__v4si)__W);
523}
524
525static __inline__ __m128i __DEFAULT_FN_ATTRS128
526_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
527{
528 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
529}
530
531static __inline__ __m256i __DEFAULT_FN_ATTRS256
532_mm256_or_epi32(__m256i __a, __m256i __b)
533{
534 return (__m256i)((__v8su)__a | (__v8su)__b);
535}
536
537static __inline__ __m256i __DEFAULT_FN_ATTRS256
538_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
539{
540 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
541 (__v8si)_mm256_or_epi32(__A, __B),
542 (__v8si)__W);
543}
544
545static __inline__ __m256i __DEFAULT_FN_ATTRS256
546_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
547{
548 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
549}
550
551static __inline__ __m128i __DEFAULT_FN_ATTRS128
552_mm_or_epi32(__m128i __a, __m128i __b)
553{
554 return (__m128i)((__v4su)__a | (__v4su)__b);
555}
556
557static __inline__ __m128i __DEFAULT_FN_ATTRS128
558_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
559{
560 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
561 (__v4si)_mm_or_epi32(__A, __B),
562 (__v4si)__W);
563}
564
565static __inline__ __m128i __DEFAULT_FN_ATTRS128
566_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
567{
568 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
569}
570
571static __inline__ __m256i __DEFAULT_FN_ATTRS256
572_mm256_xor_epi32(__m256i __a, __m256i __b)
573{
574 return (__m256i)((__v8su)__a ^ (__v8su)__b);
575}
576
577static __inline__ __m256i __DEFAULT_FN_ATTRS256
578_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
579{
580 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
581 (__v8si)_mm256_xor_epi32(__A, __B),
582 (__v8si)__W);
583}
584
585static __inline__ __m256i __DEFAULT_FN_ATTRS256
586_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
587{
588 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
589}
590
591static __inline__ __m128i __DEFAULT_FN_ATTRS128
592_mm_xor_epi32(__m128i __a, __m128i __b)
593{
594 return (__m128i)((__v4su)__a ^ (__v4su)__b);
595}
596
597static __inline__ __m128i __DEFAULT_FN_ATTRS128
598_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
599{
600 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
601 (__v4si)_mm_xor_epi32(__A, __B),
602 (__v4si)__W);
603}
604
605static __inline__ __m128i __DEFAULT_FN_ATTRS128
606_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
607{
608 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
609}
610
611static __inline__ __m256i __DEFAULT_FN_ATTRS256
612_mm256_and_epi64(__m256i __a, __m256i __b)
613{
614 return (__m256i)((__v4du)__a & (__v4du)__b);
615}
616
617static __inline__ __m256i __DEFAULT_FN_ATTRS256
618_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
619{
620 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
621 (__v4di)_mm256_and_epi64(__A, __B),
622 (__v4di)__W);
623}
624
625static __inline__ __m256i __DEFAULT_FN_ATTRS256
626_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
627{
628 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
629}
630
631static __inline__ __m128i __DEFAULT_FN_ATTRS128
632_mm_and_epi64(__m128i __a, __m128i __b)
633{
634 return (__m128i)((__v2du)__a & (__v2du)__b);
635}
636
637static __inline__ __m128i __DEFAULT_FN_ATTRS128
638_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
639{
640 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
641 (__v2di)_mm_and_epi64(__A, __B),
642 (__v2di)__W);
643}
644
645static __inline__ __m128i __DEFAULT_FN_ATTRS128
646_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
647{
648 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
649}
650
651static __inline__ __m256i __DEFAULT_FN_ATTRS256
652_mm256_andnot_epi64(__m256i __A, __m256i __B)
653{
654 return (__m256i)(~(__v4du)__A & (__v4du)__B);
655}
656
657static __inline__ __m256i __DEFAULT_FN_ATTRS256
658_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
659{
660 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
661 (__v4di)_mm256_andnot_epi64(__A, __B),
662 (__v4di)__W);
663}
664
665static __inline__ __m256i __DEFAULT_FN_ATTRS256
666_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
667{
669 __U, __A, __B);
670}
671
672static __inline__ __m128i __DEFAULT_FN_ATTRS128
673_mm_andnot_epi64(__m128i __A, __m128i __B)
674{
675 return (__m128i)(~(__v2du)__A & (__v2du)__B);
676}
677
678static __inline__ __m128i __DEFAULT_FN_ATTRS128
679_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
680{
681 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
682 (__v2di)_mm_andnot_epi64(__A, __B),
683 (__v2di)__W);
684}
685
686static __inline__ __m128i __DEFAULT_FN_ATTRS128
687_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
688{
689 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
690}
691
692static __inline__ __m256i __DEFAULT_FN_ATTRS256
693_mm256_or_epi64(__m256i __a, __m256i __b)
694{
695 return (__m256i)((__v4du)__a | (__v4du)__b);
696}
697
698static __inline__ __m256i __DEFAULT_FN_ATTRS256
699_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
700{
701 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
702 (__v4di)_mm256_or_epi64(__A, __B),
703 (__v4di)__W);
704}
705
706static __inline__ __m256i __DEFAULT_FN_ATTRS256
707_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
708{
709 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
710}
711
712static __inline__ __m128i __DEFAULT_FN_ATTRS128
713_mm_or_epi64(__m128i __a, __m128i __b)
714{
715 return (__m128i)((__v2du)__a | (__v2du)__b);
716}
717
718static __inline__ __m128i __DEFAULT_FN_ATTRS128
719_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
720{
721 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
722 (__v2di)_mm_or_epi64(__A, __B),
723 (__v2di)__W);
724}
725
726static __inline__ __m128i __DEFAULT_FN_ATTRS128
727_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
728{
729 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
730}
731
732static __inline__ __m256i __DEFAULT_FN_ATTRS256
733_mm256_xor_epi64(__m256i __a, __m256i __b)
734{
735 return (__m256i)((__v4du)__a ^ (__v4du)__b);
736}
737
738static __inline__ __m256i __DEFAULT_FN_ATTRS256
739_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
740{
741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
742 (__v4di)_mm256_xor_epi64(__A, __B),
743 (__v4di)__W);
744}
745
746static __inline__ __m256i __DEFAULT_FN_ATTRS256
747_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
748{
749 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
750}
751
752static __inline__ __m128i __DEFAULT_FN_ATTRS128
753_mm_xor_epi64(__m128i __a, __m128i __b)
754{
755 return (__m128i)((__v2du)__a ^ (__v2du)__b);
756}
757
758static __inline__ __m128i __DEFAULT_FN_ATTRS128
759_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
760 __m128i __B)
761{
762 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
763 (__v2di)_mm_xor_epi64(__A, __B),
764 (__v2di)__W);
765}
766
767static __inline__ __m128i __DEFAULT_FN_ATTRS128
768_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
769{
770 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
771}
772
773#define _mm_cmp_epi32_mask(a, b, p) \
774 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
775 (__v4si)(__m128i)(b), (int)(p), \
776 (__mmask8)-1)
777
778#define _mm_mask_cmp_epi32_mask(m, a, b, p) \
779 (__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
780 (__v4si)(__m128i)(b), (int)(p), \
781 (__mmask8)(m))
782
783#define _mm_cmp_epu32_mask(a, b, p) \
784 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
785 (__v4si)(__m128i)(b), (int)(p), \
786 (__mmask8)-1)
787
788#define _mm_mask_cmp_epu32_mask(m, a, b, p) \
789 (__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
790 (__v4si)(__m128i)(b), (int)(p), \
791 (__mmask8)(m))
792
793#define _mm256_cmp_epi32_mask(a, b, p) \
794 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
795 (__v8si)(__m256i)(b), (int)(p), \
796 (__mmask8)-1)
797
798#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
799 (__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
800 (__v8si)(__m256i)(b), (int)(p), \
801 (__mmask8)(m))
802
803#define _mm256_cmp_epu32_mask(a, b, p) \
804 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
805 (__v8si)(__m256i)(b), (int)(p), \
806 (__mmask8)-1)
807
808#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
809 (__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
810 (__v8si)(__m256i)(b), (int)(p), \
811 (__mmask8)(m))
812
813#define _mm_cmp_epi64_mask(a, b, p) \
814 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
815 (__v2di)(__m128i)(b), (int)(p), \
816 (__mmask8)-1)
817
818#define _mm_mask_cmp_epi64_mask(m, a, b, p) \
819 (__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
820 (__v2di)(__m128i)(b), (int)(p), \
821 (__mmask8)(m))
822
823#define _mm_cmp_epu64_mask(a, b, p) \
824 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
825 (__v2di)(__m128i)(b), (int)(p), \
826 (__mmask8)-1)
827
828#define _mm_mask_cmp_epu64_mask(m, a, b, p) \
829 (__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
830 (__v2di)(__m128i)(b), (int)(p), \
831 (__mmask8)(m))
832
833#define _mm256_cmp_epi64_mask(a, b, p) \
834 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
835 (__v4di)(__m256i)(b), (int)(p), \
836 (__mmask8)-1)
837
838#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
839 (__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
840 (__v4di)(__m256i)(b), (int)(p), \
841 (__mmask8)(m))
842
843#define _mm256_cmp_epu64_mask(a, b, p) \
844 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
845 (__v4di)(__m256i)(b), (int)(p), \
846 (__mmask8)-1)
847
848#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
849 (__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
850 (__v4di)(__m256i)(b), (int)(p), \
851 (__mmask8)(m))
852
853#define _mm256_cmp_ps_mask(a, b, p) \
854 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
855 (__v8sf)(__m256)(b), (int)(p), \
856 (__mmask8)-1)
857
858#define _mm256_mask_cmp_ps_mask(m, a, b, p) \
859 (__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
860 (__v8sf)(__m256)(b), (int)(p), \
861 (__mmask8)(m))
862
863#define _mm256_cmp_pd_mask(a, b, p) \
864 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
865 (__v4df)(__m256d)(b), (int)(p), \
866 (__mmask8)-1)
867
868#define _mm256_mask_cmp_pd_mask(m, a, b, p) \
869 (__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
870 (__v4df)(__m256d)(b), (int)(p), \
871 (__mmask8)(m))
872
873#define _mm_cmp_ps_mask(a, b, p) \
874 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
875 (__v4sf)(__m128)(b), (int)(p), \
876 (__mmask8)-1)
877
878#define _mm_mask_cmp_ps_mask(m, a, b, p) \
879 (__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
880 (__v4sf)(__m128)(b), (int)(p), \
881 (__mmask8)(m))
882
883#define _mm_cmp_pd_mask(a, b, p) \
884 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
885 (__v2df)(__m128d)(b), (int)(p), \
886 (__mmask8)-1)
887
888#define _mm_mask_cmp_pd_mask(m, a, b, p) \
889 (__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
890 (__v2df)(__m128d)(b), (int)(p), \
891 (__mmask8)(m))
892
893static __inline__ __m128d __DEFAULT_FN_ATTRS128
894_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
895{
896 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
897 __builtin_ia32_vfmaddpd ((__v2df) __A,
898 (__v2df) __B,
899 (__v2df) __C),
900 (__v2df) __A);
901}
902
903static __inline__ __m128d __DEFAULT_FN_ATTRS128
904_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
905{
906 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
907 __builtin_ia32_vfmaddpd ((__v2df) __A,
908 (__v2df) __B,
909 (__v2df) __C),
910 (__v2df) __C);
911}
912
913static __inline__ __m128d __DEFAULT_FN_ATTRS128
914_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
915{
916 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
917 __builtin_ia32_vfmaddpd ((__v2df) __A,
918 (__v2df) __B,
919 (__v2df) __C),
920 (__v2df)_mm_setzero_pd());
921}
922
923static __inline__ __m128d __DEFAULT_FN_ATTRS128
924_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
925{
926 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
927 __builtin_ia32_vfmaddpd ((__v2df) __A,
928 (__v2df) __B,
929 -(__v2df) __C),
930 (__v2df) __A);
931}
932
933static __inline__ __m128d __DEFAULT_FN_ATTRS128
934_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
935{
936 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
937 __builtin_ia32_vfmaddpd ((__v2df) __A,
938 (__v2df) __B,
939 -(__v2df) __C),
940 (__v2df)_mm_setzero_pd());
941}
942
943static __inline__ __m128d __DEFAULT_FN_ATTRS128
944_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
945{
946 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
947 __builtin_ia32_vfmaddpd (-(__v2df) __A,
948 (__v2df) __B,
949 (__v2df) __C),
950 (__v2df) __C);
951}
952
953static __inline__ __m128d __DEFAULT_FN_ATTRS128
954_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
955{
956 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
957 __builtin_ia32_vfmaddpd (-(__v2df) __A,
958 (__v2df) __B,
959 (__v2df) __C),
960 (__v2df)_mm_setzero_pd());
961}
962
963static __inline__ __m128d __DEFAULT_FN_ATTRS128
964_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
965{
966 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
967 __builtin_ia32_vfmaddpd (-(__v2df) __A,
968 (__v2df) __B,
969 -(__v2df) __C),
970 (__v2df)_mm_setzero_pd());
971}
972
973static __inline__ __m256d __DEFAULT_FN_ATTRS256
974_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
975{
976 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
977 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
978 (__v4df) __B,
979 (__v4df) __C),
980 (__v4df) __A);
981}
982
983static __inline__ __m256d __DEFAULT_FN_ATTRS256
984_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
985{
986 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
987 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
988 (__v4df) __B,
989 (__v4df) __C),
990 (__v4df) __C);
991}
992
993static __inline__ __m256d __DEFAULT_FN_ATTRS256
994_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
995{
996 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
997 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
998 (__v4df) __B,
999 (__v4df) __C),
1000 (__v4df)_mm256_setzero_pd());
1001}
1002
1003static __inline__ __m256d __DEFAULT_FN_ATTRS256
1004_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1005{
1006 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1008 (__v4df) __B,
1009 -(__v4df) __C),
1010 (__v4df) __A);
1011}
1012
1013static __inline__ __m256d __DEFAULT_FN_ATTRS256
1014_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1015{
1016 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1018 (__v4df) __B,
1019 -(__v4df) __C),
1020 (__v4df)_mm256_setzero_pd());
1021}
1022
1023static __inline__ __m256d __DEFAULT_FN_ATTRS256
1024_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1025{
1026 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1028 (__v4df) __B,
1029 (__v4df) __C),
1030 (__v4df) __C);
1031}
1032
1033static __inline__ __m256d __DEFAULT_FN_ATTRS256
1034_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1035{
1036 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1038 (__v4df) __B,
1039 (__v4df) __C),
1040 (__v4df)_mm256_setzero_pd());
1041}
1042
1043static __inline__ __m256d __DEFAULT_FN_ATTRS256
1044_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1045{
1046 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
1048 (__v4df) __B,
1049 -(__v4df) __C),
1050 (__v4df)_mm256_setzero_pd());
1051}
1052
1053static __inline__ __m128 __DEFAULT_FN_ATTRS128
1054_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1055{
1056 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1057 __builtin_ia32_vfmaddps ((__v4sf) __A,
1058 (__v4sf) __B,
1059 (__v4sf) __C),
1060 (__v4sf) __A);
1061}
1062
1063static __inline__ __m128 __DEFAULT_FN_ATTRS128
1064_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1065{
1066 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1067 __builtin_ia32_vfmaddps ((__v4sf) __A,
1068 (__v4sf) __B,
1069 (__v4sf) __C),
1070 (__v4sf) __C);
1071}
1072
1073static __inline__ __m128 __DEFAULT_FN_ATTRS128
1074_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1075{
1076 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1077 __builtin_ia32_vfmaddps ((__v4sf) __A,
1078 (__v4sf) __B,
1079 (__v4sf) __C),
1080 (__v4sf)_mm_setzero_ps());
1081}
1082
1083static __inline__ __m128 __DEFAULT_FN_ATTRS128
1084_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1085{
1086 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1087 __builtin_ia32_vfmaddps ((__v4sf) __A,
1088 (__v4sf) __B,
1089 -(__v4sf) __C),
1090 (__v4sf) __A);
1091}
1092
1093static __inline__ __m128 __DEFAULT_FN_ATTRS128
1094_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1095{
1096 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1097 __builtin_ia32_vfmaddps ((__v4sf) __A,
1098 (__v4sf) __B,
1099 -(__v4sf) __C),
1100 (__v4sf)_mm_setzero_ps());
1101}
1102
1103static __inline__ __m128 __DEFAULT_FN_ATTRS128
1104_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1105{
1106 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1107 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1108 (__v4sf) __B,
1109 (__v4sf) __C),
1110 (__v4sf) __C);
1111}
1112
1113static __inline__ __m128 __DEFAULT_FN_ATTRS128
1114_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1115{
1116 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1117 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1118 (__v4sf) __B,
1119 (__v4sf) __C),
1120 (__v4sf)_mm_setzero_ps());
1121}
1122
1123static __inline__ __m128 __DEFAULT_FN_ATTRS128
1124_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1125{
1126 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1127 __builtin_ia32_vfmaddps (-(__v4sf) __A,
1128 (__v4sf) __B,
1129 -(__v4sf) __C),
1130 (__v4sf)_mm_setzero_ps());
1131}
1132
1133static __inline__ __m256 __DEFAULT_FN_ATTRS256
1134_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1135{
1136 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1138 (__v8sf) __B,
1139 (__v8sf) __C),
1140 (__v8sf) __A);
1141}
1142
1143static __inline__ __m256 __DEFAULT_FN_ATTRS256
1144_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1145{
1146 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1148 (__v8sf) __B,
1149 (__v8sf) __C),
1150 (__v8sf) __C);
1151}
1152
1153static __inline__ __m256 __DEFAULT_FN_ATTRS256
1154_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1155{
1156 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1158 (__v8sf) __B,
1159 (__v8sf) __C),
1160 (__v8sf)_mm256_setzero_ps());
1161}
1162
1163static __inline__ __m256 __DEFAULT_FN_ATTRS256
1164_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1165{
1166 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1168 (__v8sf) __B,
1169 -(__v8sf) __C),
1170 (__v8sf) __A);
1171}
1172
1173static __inline__ __m256 __DEFAULT_FN_ATTRS256
1174_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1175{
1176 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1178 (__v8sf) __B,
1179 -(__v8sf) __C),
1180 (__v8sf)_mm256_setzero_ps());
1181}
1182
1183static __inline__ __m256 __DEFAULT_FN_ATTRS256
1184_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1185{
1186 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1188 (__v8sf) __B,
1189 (__v8sf) __C),
1190 (__v8sf) __C);
1191}
1192
1193static __inline__ __m256 __DEFAULT_FN_ATTRS256
1194_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1195{
1196 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1198 (__v8sf) __B,
1199 (__v8sf) __C),
1200 (__v8sf)_mm256_setzero_ps());
1201}
1202
1203static __inline__ __m256 __DEFAULT_FN_ATTRS256
1204_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1205{
1206 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
1208 (__v8sf) __B,
1209 -(__v8sf) __C),
1210 (__v8sf)_mm256_setzero_ps());
1211}
1212
1213static __inline__ __m128d __DEFAULT_FN_ATTRS128
1214_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1215{
1216 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1218 (__v2df) __B,
1219 (__v2df) __C),
1220 (__v2df) __A);
1221}
1222
1223static __inline__ __m128d __DEFAULT_FN_ATTRS128
1224_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1225{
1226 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1228 (__v2df) __B,
1229 (__v2df) __C),
1230 (__v2df) __C);
1231}
1232
1233static __inline__ __m128d __DEFAULT_FN_ATTRS128
1234_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1235{
1236 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1238 (__v2df) __B,
1239 (__v2df) __C),
1240 (__v2df)_mm_setzero_pd());
1241}
1242
1243static __inline__ __m128d __DEFAULT_FN_ATTRS128
1244_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1245{
1246 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1248 (__v2df) __B,
1249 -(__v2df) __C),
1250 (__v2df) __A);
1251}
1252
1253static __inline__ __m128d __DEFAULT_FN_ATTRS128
1254_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
1255{
1256 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1258 (__v2df) __B,
1259 -(__v2df) __C),
1260 (__v2df)_mm_setzero_pd());
1261}
1262
1263static __inline__ __m256d __DEFAULT_FN_ATTRS256
1264_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1265{
1266 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1268 (__v4df) __B,
1269 (__v4df) __C),
1270 (__v4df) __A);
1271}
1272
1273static __inline__ __m256d __DEFAULT_FN_ATTRS256
1274_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1275{
1276 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1278 (__v4df) __B,
1279 (__v4df) __C),
1280 (__v4df) __C);
1281}
1282
1283static __inline__ __m256d __DEFAULT_FN_ATTRS256
1284_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1285{
1286 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1288 (__v4df) __B,
1289 (__v4df) __C),
1290 (__v4df)_mm256_setzero_pd());
1291}
1292
1293static __inline__ __m256d __DEFAULT_FN_ATTRS256
1294_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1295{
1296 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1298 (__v4df) __B,
1299 -(__v4df) __C),
1300 (__v4df) __A);
1301}
1302
1303static __inline__ __m256d __DEFAULT_FN_ATTRS256
1304_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
1305{
1306 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1308 (__v4df) __B,
1309 -(__v4df) __C),
1310 (__v4df)_mm256_setzero_pd());
1311}
1312
1313static __inline__ __m128 __DEFAULT_FN_ATTRS128
1314_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1315{
1316 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1318 (__v4sf) __B,
1319 (__v4sf) __C),
1320 (__v4sf) __A);
1321}
1322
1323static __inline__ __m128 __DEFAULT_FN_ATTRS128
1324_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1325{
1326 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1328 (__v4sf) __B,
1329 (__v4sf) __C),
1330 (__v4sf) __C);
1331}
1332
1333static __inline__ __m128 __DEFAULT_FN_ATTRS128
1334_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1335{
1336 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1338 (__v4sf) __B,
1339 (__v4sf) __C),
1340 (__v4sf)_mm_setzero_ps());
1341}
1342
1343static __inline__ __m128 __DEFAULT_FN_ATTRS128
1344_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1345{
1346 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1348 (__v4sf) __B,
1349 -(__v4sf) __C),
1350 (__v4sf) __A);
1351}
1352
1353static __inline__ __m128 __DEFAULT_FN_ATTRS128
1354_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
1355{
1356 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1358 (__v4sf) __B,
1359 -(__v4sf) __C),
1360 (__v4sf)_mm_setzero_ps());
1361}
1362
1363static __inline__ __m256 __DEFAULT_FN_ATTRS256
1364_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
1365 __m256 __C)
1366{
1367 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1369 (__v8sf) __B,
1370 (__v8sf) __C),
1371 (__v8sf) __A);
1372}
1373
1374static __inline__ __m256 __DEFAULT_FN_ATTRS256
1375_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1376{
1377 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1379 (__v8sf) __B,
1380 (__v8sf) __C),
1381 (__v8sf) __C);
1382}
1383
1384static __inline__ __m256 __DEFAULT_FN_ATTRS256
1385_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1386{
1387 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1389 (__v8sf) __B,
1390 (__v8sf) __C),
1391 (__v8sf)_mm256_setzero_ps());
1392}
1393
1394static __inline__ __m256 __DEFAULT_FN_ATTRS256
1395_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1396{
1397 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1399 (__v8sf) __B,
1400 -(__v8sf) __C),
1401 (__v8sf) __A);
1402}
1403
1404static __inline__ __m256 __DEFAULT_FN_ATTRS256
1405_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
1406{
1407 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1409 (__v8sf) __B,
1410 -(__v8sf) __C),
1411 (__v8sf)_mm256_setzero_ps());
1412}
1413
1414static __inline__ __m128d __DEFAULT_FN_ATTRS128
1415_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1416{
1417 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1418 __builtin_ia32_vfmaddpd ((__v2df) __A,
1419 (__v2df) __B,
1420 -(__v2df) __C),
1421 (__v2df) __C);
1422}
1423
1424static __inline__ __m256d __DEFAULT_FN_ATTRS256
1425_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1426{
1427 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1429 (__v4df) __B,
1430 -(__v4df) __C),
1431 (__v4df) __C);
1432}
1433
1434static __inline__ __m128 __DEFAULT_FN_ATTRS128
1435_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1436{
1437 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1438 __builtin_ia32_vfmaddps ((__v4sf) __A,
1439 (__v4sf) __B,
1440 -(__v4sf) __C),
1441 (__v4sf) __C);
1442}
1443
1444static __inline__ __m256 __DEFAULT_FN_ATTRS256
1445_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1446{
1447 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1449 (__v8sf) __B,
1450 -(__v8sf) __C),
1451 (__v8sf) __C);
1452}
1453
1454static __inline__ __m128d __DEFAULT_FN_ATTRS128
1455_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1456{
1457 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A,
1459 (__v2df) __B,
1460 -(__v2df) __C),
1461 (__v2df) __C);
1462}
1463
1464static __inline__ __m256d __DEFAULT_FN_ATTRS256
1465_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1466{
1467 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
1469 (__v4df) __B,
1470 -(__v4df) __C),
1471 (__v4df) __C);
1472}
1473
1474static __inline__ __m128 __DEFAULT_FN_ATTRS128
1475_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1476{
1477 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A,
1479 (__v4sf) __B,
1480 -(__v4sf) __C),
1481 (__v4sf) __C);
1482}
1483
1484static __inline__ __m256 __DEFAULT_FN_ATTRS256
1485_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1486{
1487 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
1489 (__v8sf) __B,
1490 -(__v8sf) __C),
1491 (__v8sf) __C);
1492}
1493
1494static __inline__ __m128d __DEFAULT_FN_ATTRS128
1495_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1496{
1497 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1498 __builtin_ia32_vfmaddpd ((__v2df) __A,
1499 -(__v2df) __B,
1500 (__v2df) __C),
1501 (__v2df) __A);
1502}
1503
1504static __inline__ __m256d __DEFAULT_FN_ATTRS256
1505_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1506{
1507 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1509 -(__v4df) __B,
1510 (__v4df) __C),
1511 (__v4df) __A);
1512}
1513
1514static __inline__ __m128 __DEFAULT_FN_ATTRS128
1515_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1516{
1517 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1518 __builtin_ia32_vfmaddps ((__v4sf) __A,
1519 -(__v4sf) __B,
1520 (__v4sf) __C),
1521 (__v4sf) __A);
1522}
1523
1524static __inline__ __m256 __DEFAULT_FN_ATTRS256
1525_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1526{
1527 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1529 -(__v8sf) __B,
1530 (__v8sf) __C),
1531 (__v8sf) __A);
1532}
1533
1534static __inline__ __m128d __DEFAULT_FN_ATTRS128
1535_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
1536{
1537 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1538 __builtin_ia32_vfmaddpd ((__v2df) __A,
1539 -(__v2df) __B,
1540 -(__v2df) __C),
1541 (__v2df) __A);
1542}
1543
1544static __inline__ __m128d __DEFAULT_FN_ATTRS128
1545_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
1546{
1547 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
1548 __builtin_ia32_vfmaddpd ((__v2df) __A,
1549 -(__v2df) __B,
1550 -(__v2df) __C),
1551 (__v2df) __C);
1552}
1553
1554static __inline__ __m256d __DEFAULT_FN_ATTRS256
1555_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
1556{
1557 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1559 -(__v4df) __B,
1560 -(__v4df) __C),
1561 (__v4df) __A);
1562}
1563
1564static __inline__ __m256d __DEFAULT_FN_ATTRS256
1565_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
1566{
1567 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A,
1569 -(__v4df) __B,
1570 -(__v4df) __C),
1571 (__v4df) __C);
1572}
1573
1574static __inline__ __m128 __DEFAULT_FN_ATTRS128
1575_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
1576{
1577 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1578 __builtin_ia32_vfmaddps ((__v4sf) __A,
1579 -(__v4sf) __B,
1580 -(__v4sf) __C),
1581 (__v4sf) __A);
1582}
1583
1584static __inline__ __m128 __DEFAULT_FN_ATTRS128
1585_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
1586{
1587 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
1588 __builtin_ia32_vfmaddps ((__v4sf) __A,
1589 -(__v4sf) __B,
1590 -(__v4sf) __C),
1591 (__v4sf) __C);
1592}
1593
1594static __inline__ __m256 __DEFAULT_FN_ATTRS256
1595_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
1596{
1597 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1599 -(__v8sf) __B,
1600 -(__v8sf) __C),
1601 (__v8sf) __A);
1602}
1603
1604static __inline__ __m256 __DEFAULT_FN_ATTRS256
1605_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
1606{
1607 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A,
1609 -(__v8sf) __B,
1610 -(__v8sf) __C),
1611 (__v8sf) __C);
1612}
1613
1614static __inline__ __m128d __DEFAULT_FN_ATTRS128
1615_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
1616 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1617 (__v2df)_mm_add_pd(__A, __B),
1618 (__v2df)__W);
1619}
1620
1621static __inline__ __m128d __DEFAULT_FN_ATTRS128
1622_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
1623 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
1624 (__v2df)_mm_add_pd(__A, __B),
1625 (__v2df)_mm_setzero_pd());
1626}
1627
1628static __inline__ __m256d __DEFAULT_FN_ATTRS256
1629_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
1630 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1631 (__v4df)_mm256_add_pd(__A, __B),
1632 (__v4df)__W);
1633}
1634
1635static __inline__ __m256d __DEFAULT_FN_ATTRS256
1636_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
1637 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
1638 (__v4df)_mm256_add_pd(__A, __B),
1639 (__v4df)_mm256_setzero_pd());
1640}
1641
1642static __inline__ __m128 __DEFAULT_FN_ATTRS128
1643_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
1644 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1645 (__v4sf)_mm_add_ps(__A, __B),
1646 (__v4sf)__W);
1647}
1648
1649static __inline__ __m128 __DEFAULT_FN_ATTRS128
1650_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
1651 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1652 (__v4sf)_mm_add_ps(__A, __B),
1653 (__v4sf)_mm_setzero_ps());
1654}
1655
1656static __inline__ __m256 __DEFAULT_FN_ATTRS256
1657_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
1658 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1659 (__v8sf)_mm256_add_ps(__A, __B),
1660 (__v8sf)__W);
1661}
1662
1663static __inline__ __m256 __DEFAULT_FN_ATTRS256
1664_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
1665 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1666 (__v8sf)_mm256_add_ps(__A, __B),
1667 (__v8sf)_mm256_setzero_ps());
1668}
1669
1670static __inline__ __m128i __DEFAULT_FN_ATTRS128
1671_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
1672 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
1673 (__v4si) __W,
1674 (__v4si) __A);
1675}
1676
1677static __inline__ __m256i __DEFAULT_FN_ATTRS256
1678_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
1679 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
1680 (__v8si) __W,
1681 (__v8si) __A);
1682}
1683
1684static __inline__ __m128d __DEFAULT_FN_ATTRS128
1685_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
1686 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
1687 (__v2df) __W,
1688 (__v2df) __A);
1689}
1690
1691static __inline__ __m256d __DEFAULT_FN_ATTRS256
1692_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
1693 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
1694 (__v4df) __W,
1695 (__v4df) __A);
1696}
1697
1698static __inline__ __m128 __DEFAULT_FN_ATTRS128
1699_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
1700 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
1701 (__v4sf) __W,
1702 (__v4sf) __A);
1703}
1704
1705static __inline__ __m256 __DEFAULT_FN_ATTRS256
1706_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
1707 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
1708 (__v8sf) __W,
1709 (__v8sf) __A);
1710}
1711
1712static __inline__ __m128i __DEFAULT_FN_ATTRS128
1713_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
1714 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
1715 (__v2di) __W,
1716 (__v2di) __A);
1717}
1718
1719static __inline__ __m256i __DEFAULT_FN_ATTRS256
1720_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
1721 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
1722 (__v4di) __W,
1723 (__v4di) __A);
1724}
1725
1726static __inline__ __m128d __DEFAULT_FN_ATTRS128
1727_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1729 (__v2df) __W,
1730 (__mmask8) __U);
1731}
1732
1733static __inline__ __m128d __DEFAULT_FN_ATTRS128
1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
1736 (__v2df)
1737 _mm_setzero_pd (),
1738 (__mmask8) __U);
1739}
1740
1741static __inline__ __m256d __DEFAULT_FN_ATTRS256
1742_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1744 (__v4df) __W,
1745 (__mmask8) __U);
1746}
1747
1748static __inline__ __m256d __DEFAULT_FN_ATTRS256
1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
1751 (__v4df)
1753 (__mmask8) __U);
1754}
1755
1756static __inline__ __m128i __DEFAULT_FN_ATTRS128
1757_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1759 (__v2di) __W,
1760 (__mmask8) __U);
1761}
1762
1763static __inline__ __m128i __DEFAULT_FN_ATTRS128
1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
1766 (__v2di)
1768 (__mmask8) __U);
1769}
1770
1771static __inline__ __m256i __DEFAULT_FN_ATTRS256
1772_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1774 (__v4di) __W,
1775 (__mmask8) __U);
1776}
1777
1778static __inline__ __m256i __DEFAULT_FN_ATTRS256
1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
1781 (__v4di)
1783 (__mmask8) __U);
1784}
1785
1786static __inline__ __m128 __DEFAULT_FN_ATTRS128
1787_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1789 (__v4sf) __W,
1790 (__mmask8) __U);
1791}
1792
1793static __inline__ __m128 __DEFAULT_FN_ATTRS128
1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
1796 (__v4sf)
1797 _mm_setzero_ps (),
1798 (__mmask8) __U);
1799}
1800
1801static __inline__ __m256 __DEFAULT_FN_ATTRS256
1802_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1804 (__v8sf) __W,
1805 (__mmask8) __U);
1806}
1807
1808static __inline__ __m256 __DEFAULT_FN_ATTRS256
1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
1811 (__v8sf)
1813 (__mmask8) __U);
1814}
1815
1816static __inline__ __m128i __DEFAULT_FN_ATTRS128
1817_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1819 (__v4si) __W,
1820 (__mmask8) __U);
1821}
1822
1823static __inline__ __m128i __DEFAULT_FN_ATTRS128
1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
1826 (__v4si)
1828 (__mmask8) __U);
1829}
1830
1831static __inline__ __m256i __DEFAULT_FN_ATTRS256
1832_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1834 (__v8si) __W,
1835 (__mmask8) __U);
1836}
1837
1838static __inline__ __m256i __DEFAULT_FN_ATTRS256
1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
1841 (__v8si)
1843 (__mmask8) __U);
1844}
1845
1846static __inline__ void __DEFAULT_FN_ATTRS128
1847_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
1849 (__v2df) __A,
1850 (__mmask8) __U);
1851}
1852
1853static __inline__ void __DEFAULT_FN_ATTRS256
1854_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
1856 (__v4df) __A,
1857 (__mmask8) __U);
1858}
1859
1860static __inline__ void __DEFAULT_FN_ATTRS128
1861_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
1863 (__v2di) __A,
1864 (__mmask8) __U);
1865}
1866
1867static __inline__ void __DEFAULT_FN_ATTRS256
1868_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
1870 (__v4di) __A,
1871 (__mmask8) __U);
1872}
1873
1874static __inline__ void __DEFAULT_FN_ATTRS128
1875_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
1877 (__v4sf) __A,
1878 (__mmask8) __U);
1879}
1880
1881static __inline__ void __DEFAULT_FN_ATTRS256
1882_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
1884 (__v8sf) __A,
1885 (__mmask8) __U);
1886}
1887
1888static __inline__ void __DEFAULT_FN_ATTRS128
1889_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
1891 (__v4si) __A,
1892 (__mmask8) __U);
1893}
1894
1895static __inline__ void __DEFAULT_FN_ATTRS256
1896_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
1898 (__v8si) __A,
1899 (__mmask8) __U);
1900}
1901
1902static __inline__ __m128d __DEFAULT_FN_ATTRS128
1903_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
1904 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1905 (__v2df)_mm_cvtepi32_pd(__A),
1906 (__v2df)__W);
1907}
1908
1909static __inline__ __m128d __DEFAULT_FN_ATTRS128
1911 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
1912 (__v2df)_mm_cvtepi32_pd(__A),
1913 (__v2df)_mm_setzero_pd());
1914}
1915
1916static __inline__ __m256d __DEFAULT_FN_ATTRS256
1917_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
1918 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1919 (__v4df)_mm256_cvtepi32_pd(__A),
1920 (__v4df)__W);
1921}
1922
1923static __inline__ __m256d __DEFAULT_FN_ATTRS256
1925 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
1926 (__v4df)_mm256_cvtepi32_pd(__A),
1927 (__v4df)_mm256_setzero_pd());
1928}
1929
1930static __inline__ __m128 __DEFAULT_FN_ATTRS128
1931_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
1932 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1933 (__v4sf)_mm_cvtepi32_ps(__A),
1934 (__v4sf)__W);
1935}
1936
1937static __inline__ __m128 __DEFAULT_FN_ATTRS128
1939 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
1940 (__v4sf)_mm_cvtepi32_ps(__A),
1941 (__v4sf)_mm_setzero_ps());
1942}
1943
1944static __inline__ __m256 __DEFAULT_FN_ATTRS256
1945_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
1946 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1947 (__v8sf)_mm256_cvtepi32_ps(__A),
1948 (__v8sf)__W);
1949}
1950
1951static __inline__ __m256 __DEFAULT_FN_ATTRS256
1953 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
1954 (__v8sf)_mm256_cvtepi32_ps(__A),
1955 (__v8sf)_mm256_setzero_ps());
1956}
1957
1958static __inline__ __m128i __DEFAULT_FN_ATTRS128
1959_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1961 (__v4si) __W,
1962 (__mmask8) __U);
1963}
1964
1965static __inline__ __m128i __DEFAULT_FN_ATTRS128
1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
1968 (__v4si)
1970 (__mmask8) __U);
1971}
1972
1973static __inline__ __m128i __DEFAULT_FN_ATTRS256
1974_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
1975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1976 (__v4si)_mm256_cvtpd_epi32(__A),
1977 (__v4si)__W);
1978}
1979
1980static __inline__ __m128i __DEFAULT_FN_ATTRS256
1982 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
1983 (__v4si)_mm256_cvtpd_epi32(__A),
1984 (__v4si)_mm_setzero_si128());
1985}
1986
1987static __inline__ __m128 __DEFAULT_FN_ATTRS128
1988_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1990 (__v4sf) __W,
1991 (__mmask8) __U);
1992}
1993
1994static __inline__ __m128 __DEFAULT_FN_ATTRS128
1995_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
1997 (__v4sf)
1998 _mm_setzero_ps (),
1999 (__mmask8) __U);
2000}
2001
2002static __inline__ __m128 __DEFAULT_FN_ATTRS256
2003_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
2004 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2005 (__v4sf)_mm256_cvtpd_ps(__A),
2006 (__v4sf)__W);
2007}
2008
2009static __inline__ __m128 __DEFAULT_FN_ATTRS256
2011 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2012 (__v4sf)_mm256_cvtpd_ps(__A),
2013 (__v4sf)_mm_setzero_ps());
2014}
2015
2016static __inline__ __m128i __DEFAULT_FN_ATTRS128
2017_mm_cvtpd_epu32 (__m128d __A) {
2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2019 (__v4si)
2021 (__mmask8) -1);
2022}
2023
2024static __inline__ __m128i __DEFAULT_FN_ATTRS128
2025_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2027 (__v4si) __W,
2028 (__mmask8) __U);
2029}
2030
2031static __inline__ __m128i __DEFAULT_FN_ATTRS128
2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
2034 (__v4si)
2036 (__mmask8) __U);
2037}
2038
2039static __inline__ __m128i __DEFAULT_FN_ATTRS256
2040_mm256_cvtpd_epu32 (__m256d __A) {
2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2042 (__v4si)
2044 (__mmask8) -1);
2045}
2046
2047static __inline__ __m128i __DEFAULT_FN_ATTRS256
2048_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2050 (__v4si) __W,
2051 (__mmask8) __U);
2052}
2053
2054static __inline__ __m128i __DEFAULT_FN_ATTRS256
2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
2057 (__v4si)
2059 (__mmask8) __U);
2060}
2061
2062static __inline__ __m128i __DEFAULT_FN_ATTRS128
2063_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2064 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2065 (__v4si)_mm_cvtps_epi32(__A),
2066 (__v4si)__W);
2067}
2068
2069static __inline__ __m128i __DEFAULT_FN_ATTRS128
2071 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2072 (__v4si)_mm_cvtps_epi32(__A),
2073 (__v4si)_mm_setzero_si128());
2074}
2075
2076static __inline__ __m256i __DEFAULT_FN_ATTRS256
2077_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2078 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2079 (__v8si)_mm256_cvtps_epi32(__A),
2080 (__v8si)__W);
2081}
2082
2083static __inline__ __m256i __DEFAULT_FN_ATTRS256
2085 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2086 (__v8si)_mm256_cvtps_epi32(__A),
2087 (__v8si)_mm256_setzero_si256());
2088}
2089
2090static __inline__ __m128d __DEFAULT_FN_ATTRS128
2091_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
2092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2093 (__v2df)_mm_cvtps_pd(__A),
2094 (__v2df)__W);
2095}
2096
2097static __inline__ __m128d __DEFAULT_FN_ATTRS128
2098_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
2099 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2100 (__v2df)_mm_cvtps_pd(__A),
2101 (__v2df)_mm_setzero_pd());
2102}
2103
2104static __inline__ __m256d __DEFAULT_FN_ATTRS256
2105_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
2106 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2107 (__v4df)_mm256_cvtps_pd(__A),
2108 (__v4df)__W);
2109}
2110
2111static __inline__ __m256d __DEFAULT_FN_ATTRS256
2113 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2114 (__v4df)_mm256_cvtps_pd(__A),
2115 (__v4df)_mm256_setzero_pd());
2116}
2117
2118static __inline__ __m128i __DEFAULT_FN_ATTRS128
2119_mm_cvtps_epu32 (__m128 __A) {
2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2121 (__v4si)
2123 (__mmask8) -1);
2124}
2125
2126static __inline__ __m128i __DEFAULT_FN_ATTRS128
2127_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2129 (__v4si) __W,
2130 (__mmask8) __U);
2131}
2132
2133static __inline__ __m128i __DEFAULT_FN_ATTRS128
2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
2136 (__v4si)
2138 (__mmask8) __U);
2139}
2140
2141static __inline__ __m256i __DEFAULT_FN_ATTRS256
2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2144 (__v8si)
2146 (__mmask8) -1);
2147}
2148
2149static __inline__ __m256i __DEFAULT_FN_ATTRS256
2150_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2152 (__v8si) __W,
2153 (__mmask8) __U);
2154}
2155
2156static __inline__ __m256i __DEFAULT_FN_ATTRS256
2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
2159 (__v8si)
2161 (__mmask8) __U);
2162}
2163
2164static __inline__ __m128i __DEFAULT_FN_ATTRS128
2165_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2167 (__v4si) __W,
2168 (__mmask8) __U);
2169}
2170
2171static __inline__ __m128i __DEFAULT_FN_ATTRS128
2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
2174 (__v4si)
2176 (__mmask8) __U);
2177}
2178
2179static __inline__ __m128i __DEFAULT_FN_ATTRS256
2180_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
2181 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2182 (__v4si)_mm256_cvttpd_epi32(__A),
2183 (__v4si)__W);
2184}
2185
2186static __inline__ __m128i __DEFAULT_FN_ATTRS256
2188 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2189 (__v4si)_mm256_cvttpd_epi32(__A),
2190 (__v4si)_mm_setzero_si128());
2191}
2192
2193static __inline__ __m128i __DEFAULT_FN_ATTRS128
2194_mm_cvttpd_epu32 (__m128d __A) {
2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2196 (__v4si)
2198 (__mmask8) -1);
2199}
2200
2201static __inline__ __m128i __DEFAULT_FN_ATTRS128
2202_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2204 (__v4si) __W,
2205 (__mmask8) __U);
2206}
2207
2208static __inline__ __m128i __DEFAULT_FN_ATTRS128
2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
2211 (__v4si)
2213 (__mmask8) __U);
2214}
2215
2216static __inline__ __m128i __DEFAULT_FN_ATTRS256
2217_mm256_cvttpd_epu32 (__m256d __A) {
2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2219 (__v4si)
2221 (__mmask8) -1);
2222}
2223
2224static __inline__ __m128i __DEFAULT_FN_ATTRS256
2225_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2227 (__v4si) __W,
2228 (__mmask8) __U);
2229}
2230
2231static __inline__ __m128i __DEFAULT_FN_ATTRS256
2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
2234 (__v4si)
2236 (__mmask8) __U);
2237}
2238
2239static __inline__ __m128i __DEFAULT_FN_ATTRS128
2240_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
2241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2242 (__v4si)_mm_cvttps_epi32(__A),
2243 (__v4si)__W);
2244}
2245
2246static __inline__ __m128i __DEFAULT_FN_ATTRS128
2248 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2249 (__v4si)_mm_cvttps_epi32(__A),
2250 (__v4si)_mm_setzero_si128());
2251}
2252
2253static __inline__ __m256i __DEFAULT_FN_ATTRS256
2254_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
2255 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2256 (__v8si)_mm256_cvttps_epi32(__A),
2257 (__v8si)__W);
2258}
2259
2260static __inline__ __m256i __DEFAULT_FN_ATTRS256
2262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2263 (__v8si)_mm256_cvttps_epi32(__A),
2264 (__v8si)_mm256_setzero_si256());
2265}
2266
2267static __inline__ __m128i __DEFAULT_FN_ATTRS128
2268_mm_cvttps_epu32 (__m128 __A) {
2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2270 (__v4si)
2272 (__mmask8) -1);
2273}
2274
2275static __inline__ __m128i __DEFAULT_FN_ATTRS128
2276_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2278 (__v4si) __W,
2279 (__mmask8) __U);
2280}
2281
2282static __inline__ __m128i __DEFAULT_FN_ATTRS128
2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
2285 (__v4si)
2287 (__mmask8) __U);
2288}
2289
2290static __inline__ __m256i __DEFAULT_FN_ATTRS256
2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2293 (__v8si)
2295 (__mmask8) -1);
2296}
2297
2298static __inline__ __m256i __DEFAULT_FN_ATTRS256
2299_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2301 (__v8si) __W,
2302 (__mmask8) __U);
2303}
2304
2305static __inline__ __m256i __DEFAULT_FN_ATTRS256
2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
2308 (__v8si)
2310 (__mmask8) __U);
2311}
2312
2313static __inline__ __m128d __DEFAULT_FN_ATTRS128
2314_mm_cvtepu32_pd (__m128i __A) {
2315 return (__m128d) __builtin_convertvector(
2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
2317}
2318
2319static __inline__ __m128d __DEFAULT_FN_ATTRS128
2320_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
2321 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2322 (__v2df)_mm_cvtepu32_pd(__A),
2323 (__v2df)__W);
2324}
2325
2326static __inline__ __m128d __DEFAULT_FN_ATTRS128
2328 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
2329 (__v2df)_mm_cvtepu32_pd(__A),
2330 (__v2df)_mm_setzero_pd());
2331}
2332
2333static __inline__ __m256d __DEFAULT_FN_ATTRS256
2334_mm256_cvtepu32_pd (__m128i __A) {
2335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
2336}
2337
2338static __inline__ __m256d __DEFAULT_FN_ATTRS256
2339_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
2340 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2341 (__v4df)_mm256_cvtepu32_pd(__A),
2342 (__v4df)__W);
2343}
2344
2345static __inline__ __m256d __DEFAULT_FN_ATTRS256
2347 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
2348 (__v4df)_mm256_cvtepu32_pd(__A),
2349 (__v4df)_mm256_setzero_pd());
2350}
2351
2352static __inline__ __m128 __DEFAULT_FN_ATTRS128
2353_mm_cvtepu32_ps (__m128i __A) {
2354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
2355}
2356
2357static __inline__ __m128 __DEFAULT_FN_ATTRS128
2358_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
2359 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2360 (__v4sf)_mm_cvtepu32_ps(__A),
2361 (__v4sf)__W);
2362}
2363
2364static __inline__ __m128 __DEFAULT_FN_ATTRS128
2366 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2367 (__v4sf)_mm_cvtepu32_ps(__A),
2368 (__v4sf)_mm_setzero_ps());
2369}
2370
2371static __inline__ __m256 __DEFAULT_FN_ATTRS256
2372_mm256_cvtepu32_ps (__m256i __A) {
2373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
2374}
2375
2376static __inline__ __m256 __DEFAULT_FN_ATTRS256
2377_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
2378 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2379 (__v8sf)_mm256_cvtepu32_ps(__A),
2380 (__v8sf)__W);
2381}
2382
2383static __inline__ __m256 __DEFAULT_FN_ATTRS256
2385 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2386 (__v8sf)_mm256_cvtepu32_ps(__A),
2387 (__v8sf)_mm256_setzero_ps());
2388}
2389
2390static __inline__ __m128d __DEFAULT_FN_ATTRS128
2391_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2392 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2393 (__v2df)_mm_div_pd(__A, __B),
2394 (__v2df)__W);
2395}
2396
2397static __inline__ __m128d __DEFAULT_FN_ATTRS128
2398_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2399 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2400 (__v2df)_mm_div_pd(__A, __B),
2401 (__v2df)_mm_setzero_pd());
2402}
2403
2404static __inline__ __m256d __DEFAULT_FN_ATTRS256
2405_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2406 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2407 (__v4df)_mm256_div_pd(__A, __B),
2408 (__v4df)__W);
2409}
2410
2411static __inline__ __m256d __DEFAULT_FN_ATTRS256
2412_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2413 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2414 (__v4df)_mm256_div_pd(__A, __B),
2415 (__v4df)_mm256_setzero_pd());
2416}
2417
2418static __inline__ __m128 __DEFAULT_FN_ATTRS128
2419_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2420 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2421 (__v4sf)_mm_div_ps(__A, __B),
2422 (__v4sf)__W);
2423}
2424
2425static __inline__ __m128 __DEFAULT_FN_ATTRS128
2426_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2427 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2428 (__v4sf)_mm_div_ps(__A, __B),
2429 (__v4sf)_mm_setzero_ps());
2430}
2431
2432static __inline__ __m256 __DEFAULT_FN_ATTRS256
2433_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2434 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2435 (__v8sf)_mm256_div_ps(__A, __B),
2436 (__v8sf)__W);
2437}
2438
2439static __inline__ __m256 __DEFAULT_FN_ATTRS256
2440_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2441 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2442 (__v8sf)_mm256_div_ps(__A, __B),
2443 (__v8sf)_mm256_setzero_ps());
2444}
2445
2446static __inline__ __m128d __DEFAULT_FN_ATTRS128
2447_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2449 (__v2df) __W,
2450 (__mmask8) __U);
2451}
2452
2453static __inline__ __m128d __DEFAULT_FN_ATTRS128
2454_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
2456 (__v2df)
2457 _mm_setzero_pd (),
2458 (__mmask8) __U);
2459}
2460
2461static __inline__ __m256d __DEFAULT_FN_ATTRS256
2462_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2464 (__v4df) __W,
2465 (__mmask8) __U);
2466}
2467
2468static __inline__ __m256d __DEFAULT_FN_ATTRS256
2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
2471 (__v4df)
2473 (__mmask8) __U);
2474}
2475
2476static __inline__ __m128i __DEFAULT_FN_ATTRS128
2477_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2479 (__v2di) __W,
2480 (__mmask8) __U);
2481}
2482
2483static __inline__ __m128i __DEFAULT_FN_ATTRS128
2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
2486 (__v2di)
2488 (__mmask8) __U);
2489}
2490
2491static __inline__ __m256i __DEFAULT_FN_ATTRS256
2492_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2494 (__v4di) __W,
2495 (__mmask8) __U);
2496}
2497
2498static __inline__ __m256i __DEFAULT_FN_ATTRS256
2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
2501 (__v4di)
2503 (__mmask8) __U);
2504}
2505
2506static __inline__ __m128d __DEFAULT_FN_ATTRS128
2507_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
2508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2509 (__v2df) __W,
2510 (__mmask8)
2511 __U);
2512}
2513
2514static __inline__ __m128d __DEFAULT_FN_ATTRS128
2515_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
2516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
2517 (__v2df)
2518 _mm_setzero_pd (),
2519 (__mmask8)
2520 __U);
2521}
2522
2523static __inline__ __m256d __DEFAULT_FN_ATTRS256
2524_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
2525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2526 (__v4df) __W,
2527 (__mmask8)
2528 __U);
2529}
2530
2531static __inline__ __m256d __DEFAULT_FN_ATTRS256
2533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
2534 (__v4df)
2536 (__mmask8)
2537 __U);
2538}
2539
2540static __inline__ __m128i __DEFAULT_FN_ATTRS128
2541_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
2542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2543 (__v2di) __W,
2544 (__mmask8)
2545 __U);
2546}
2547
2548static __inline__ __m128i __DEFAULT_FN_ATTRS128
2550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
2551 (__v2di)
2553 (__mmask8)
2554 __U);
2555}
2556
2557static __inline__ __m256i __DEFAULT_FN_ATTRS256
2559 void const *__P) {
2560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2561 (__v4di) __W,
2562 (__mmask8)
2563 __U);
2564}
2565
2566static __inline__ __m256i __DEFAULT_FN_ATTRS256
2568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
2569 (__v4di)
2571 (__mmask8)
2572 __U);
2573}
2574
2575static __inline__ __m128 __DEFAULT_FN_ATTRS128
2576_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
2577 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2578 (__v4sf) __W,
2579 (__mmask8) __U);
2580}
2581
2582static __inline__ __m128 __DEFAULT_FN_ATTRS128
2583_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
2584 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
2585 (__v4sf)
2586 _mm_setzero_ps (),
2587 (__mmask8)
2588 __U);
2589}
2590
2591static __inline__ __m256 __DEFAULT_FN_ATTRS256
2592_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
2593 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2594 (__v8sf) __W,
2595 (__mmask8) __U);
2596}
2597
2598static __inline__ __m256 __DEFAULT_FN_ATTRS256
2600 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
2601 (__v8sf)
2603 (__mmask8)
2604 __U);
2605}
2606
2607static __inline__ __m128i __DEFAULT_FN_ATTRS128
2608_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
2609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2610 (__v4si) __W,
2611 (__mmask8)
2612 __U);
2613}
2614
2615static __inline__ __m128i __DEFAULT_FN_ATTRS128
2617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
2618 (__v4si)
2620 (__mmask8) __U);
2621}
2622
2623static __inline__ __m256i __DEFAULT_FN_ATTRS256
2625 void const *__P) {
2626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2627 (__v8si) __W,
2628 (__mmask8)
2629 __U);
2630}
2631
2632static __inline__ __m256i __DEFAULT_FN_ATTRS256
2634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
2635 (__v8si)
2637 (__mmask8)
2638 __U);
2639}
2640
2641static __inline__ __m128 __DEFAULT_FN_ATTRS128
2642_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2644 (__v4sf) __W,
2645 (__mmask8) __U);
2646}
2647
2648static __inline__ __m128 __DEFAULT_FN_ATTRS128
2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
2651 (__v4sf)
2652 _mm_setzero_ps (),
2653 (__mmask8) __U);
2654}
2655
2656static __inline__ __m256 __DEFAULT_FN_ATTRS256
2657_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2659 (__v8sf) __W,
2660 (__mmask8) __U);
2661}
2662
2663static __inline__ __m256 __DEFAULT_FN_ATTRS256
2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
2666 (__v8sf)
2668 (__mmask8) __U);
2669}
2670
2671static __inline__ __m128i __DEFAULT_FN_ATTRS128
2672_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2674 (__v4si) __W,
2675 (__mmask8) __U);
2676}
2677
2678static __inline__ __m128i __DEFAULT_FN_ATTRS128
2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
2681 (__v4si)
2683 (__mmask8) __U);
2684}
2685
2686static __inline__ __m256i __DEFAULT_FN_ATTRS256
2687_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2689 (__v8si) __W,
2690 (__mmask8) __U);
2691}
2692
2693static __inline__ __m256i __DEFAULT_FN_ATTRS256
2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
2696 (__v8si)
2698 (__mmask8) __U);
2699}
2700
2701static __inline__ __m128d __DEFAULT_FN_ATTRS128
2702_mm_getexp_pd (__m128d __A) {
2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2704 (__v2df)
2705 _mm_setzero_pd (),
2706 (__mmask8) -1);
2707}
2708
2709static __inline__ __m128d __DEFAULT_FN_ATTRS128
2710_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2712 (__v2df) __W,
2713 (__mmask8) __U);
2714}
2715
2716static __inline__ __m128d __DEFAULT_FN_ATTRS128
2717_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
2719 (__v2df)
2720 _mm_setzero_pd (),
2721 (__mmask8) __U);
2722}
2723
2724static __inline__ __m256d __DEFAULT_FN_ATTRS256
2725_mm256_getexp_pd (__m256d __A) {
2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2727 (__v4df)
2729 (__mmask8) -1);
2730}
2731
2732static __inline__ __m256d __DEFAULT_FN_ATTRS256
2733_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2735 (__v4df) __W,
2736 (__mmask8) __U);
2737}
2738
2739static __inline__ __m256d __DEFAULT_FN_ATTRS256
2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
2742 (__v4df)
2744 (__mmask8) __U);
2745}
2746
2747static __inline__ __m128 __DEFAULT_FN_ATTRS128
2748_mm_getexp_ps (__m128 __A) {
2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2750 (__v4sf)
2751 _mm_setzero_ps (),
2752 (__mmask8) -1);
2753}
2754
2755static __inline__ __m128 __DEFAULT_FN_ATTRS128
2756_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2758 (__v4sf) __W,
2759 (__mmask8) __U);
2760}
2761
2762static __inline__ __m128 __DEFAULT_FN_ATTRS128
2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
2765 (__v4sf)
2766 _mm_setzero_ps (),
2767 (__mmask8) __U);
2768}
2769
2770static __inline__ __m256 __DEFAULT_FN_ATTRS256
2771_mm256_getexp_ps (__m256 __A) {
2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2773 (__v8sf)
2775 (__mmask8) -1);
2776}
2777
2778static __inline__ __m256 __DEFAULT_FN_ATTRS256
2779_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2781 (__v8sf) __W,
2782 (__mmask8) __U);
2783}
2784
2785static __inline__ __m256 __DEFAULT_FN_ATTRS256
2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
2788 (__v8sf)
2790 (__mmask8) __U);
2791}
2792
2793static __inline__ __m128d __DEFAULT_FN_ATTRS128
2794_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2795 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2796 (__v2df)_mm_max_pd(__A, __B),
2797 (__v2df)__W);
2798}
2799
2800static __inline__ __m128d __DEFAULT_FN_ATTRS128
2801_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2802 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2803 (__v2df)_mm_max_pd(__A, __B),
2804 (__v2df)_mm_setzero_pd());
2805}
2806
2807static __inline__ __m256d __DEFAULT_FN_ATTRS256
2808_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2809 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2810 (__v4df)_mm256_max_pd(__A, __B),
2811 (__v4df)__W);
2812}
2813
2814static __inline__ __m256d __DEFAULT_FN_ATTRS256
2815_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2816 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2817 (__v4df)_mm256_max_pd(__A, __B),
2818 (__v4df)_mm256_setzero_pd());
2819}
2820
2821static __inline__ __m128 __DEFAULT_FN_ATTRS128
2822_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2823 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2824 (__v4sf)_mm_max_ps(__A, __B),
2825 (__v4sf)__W);
2826}
2827
2828static __inline__ __m128 __DEFAULT_FN_ATTRS128
2829_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2830 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2831 (__v4sf)_mm_max_ps(__A, __B),
2832 (__v4sf)_mm_setzero_ps());
2833}
2834
2835static __inline__ __m256 __DEFAULT_FN_ATTRS256
2836_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2837 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2838 (__v8sf)_mm256_max_ps(__A, __B),
2839 (__v8sf)__W);
2840}
2841
2842static __inline__ __m256 __DEFAULT_FN_ATTRS256
2843_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2844 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2845 (__v8sf)_mm256_max_ps(__A, __B),
2846 (__v8sf)_mm256_setzero_ps());
2847}
2848
2849static __inline__ __m128d __DEFAULT_FN_ATTRS128
2850_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2851 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2852 (__v2df)_mm_min_pd(__A, __B),
2853 (__v2df)__W);
2854}
2855
2856static __inline__ __m128d __DEFAULT_FN_ATTRS128
2857_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2858 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2859 (__v2df)_mm_min_pd(__A, __B),
2860 (__v2df)_mm_setzero_pd());
2861}
2862
2863static __inline__ __m256d __DEFAULT_FN_ATTRS256
2864_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2865 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2866 (__v4df)_mm256_min_pd(__A, __B),
2867 (__v4df)__W);
2868}
2869
2870static __inline__ __m256d __DEFAULT_FN_ATTRS256
2871_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2872 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2873 (__v4df)_mm256_min_pd(__A, __B),
2874 (__v4df)_mm256_setzero_pd());
2875}
2876
2877static __inline__ __m128 __DEFAULT_FN_ATTRS128
2878_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2879 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2880 (__v4sf)_mm_min_ps(__A, __B),
2881 (__v4sf)__W);
2882}
2883
2884static __inline__ __m128 __DEFAULT_FN_ATTRS128
2885_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2886 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2887 (__v4sf)_mm_min_ps(__A, __B),
2888 (__v4sf)_mm_setzero_ps());
2889}
2890
2891static __inline__ __m256 __DEFAULT_FN_ATTRS256
2892_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2893 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2894 (__v8sf)_mm256_min_ps(__A, __B),
2895 (__v8sf)__W);
2896}
2897
2898static __inline__ __m256 __DEFAULT_FN_ATTRS256
2899_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2900 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2901 (__v8sf)_mm256_min_ps(__A, __B),
2902 (__v8sf)_mm256_setzero_ps());
2903}
2904
2905static __inline__ __m128d __DEFAULT_FN_ATTRS128
2906_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
2907 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2908 (__v2df)_mm_mul_pd(__A, __B),
2909 (__v2df)__W);
2910}
2911
2912static __inline__ __m128d __DEFAULT_FN_ATTRS128
2913_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
2914 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
2915 (__v2df)_mm_mul_pd(__A, __B),
2916 (__v2df)_mm_setzero_pd());
2917}
2918
2919static __inline__ __m256d __DEFAULT_FN_ATTRS256
2920_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
2921 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2922 (__v4df)_mm256_mul_pd(__A, __B),
2923 (__v4df)__W);
2924}
2925
2926static __inline__ __m256d __DEFAULT_FN_ATTRS256
2927_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
2928 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
2929 (__v4df)_mm256_mul_pd(__A, __B),
2930 (__v4df)_mm256_setzero_pd());
2931}
2932
2933static __inline__ __m128 __DEFAULT_FN_ATTRS128
2934_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
2935 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2936 (__v4sf)_mm_mul_ps(__A, __B),
2937 (__v4sf)__W);
2938}
2939
2940static __inline__ __m128 __DEFAULT_FN_ATTRS128
2941_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
2942 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
2943 (__v4sf)_mm_mul_ps(__A, __B),
2944 (__v4sf)_mm_setzero_ps());
2945}
2946
2947static __inline__ __m256 __DEFAULT_FN_ATTRS256
2948_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
2949 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2950 (__v8sf)_mm256_mul_ps(__A, __B),
2951 (__v8sf)__W);
2952}
2953
2954static __inline__ __m256 __DEFAULT_FN_ATTRS256
2955_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
2956 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
2957 (__v8sf)_mm256_mul_ps(__A, __B),
2958 (__v8sf)_mm256_setzero_ps());
2959}
2960
2961static __inline__ __m128i __DEFAULT_FN_ATTRS128
2962_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
2963 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2964 (__v4si)_mm_abs_epi32(__A),
2965 (__v4si)__W);
2966}
2967
2968static __inline__ __m128i __DEFAULT_FN_ATTRS128
2970 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
2971 (__v4si)_mm_abs_epi32(__A),
2972 (__v4si)_mm_setzero_si128());
2973}
2974
2975static __inline__ __m256i __DEFAULT_FN_ATTRS256
2976_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
2977 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2978 (__v8si)_mm256_abs_epi32(__A),
2979 (__v8si)__W);
2980}
2981
2982static __inline__ __m256i __DEFAULT_FN_ATTRS256
2984 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
2985 (__v8si)_mm256_abs_epi32(__A),
2986 (__v8si)_mm256_setzero_si256());
2987}
2988
2989static __inline__ __m128i __DEFAULT_FN_ATTRS128
2990_mm_abs_epi64 (__m128i __A) {
2991 return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
2992}
2993
2994static __inline__ __m128i __DEFAULT_FN_ATTRS128
2995_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
2996 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
2997 (__v2di)_mm_abs_epi64(__A),
2998 (__v2di)__W);
2999}
3000
3001static __inline__ __m128i __DEFAULT_FN_ATTRS128
3002_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
3003 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
3004 (__v2di)_mm_abs_epi64(__A),
3005 (__v2di)_mm_setzero_si128());
3006}
3007
3008static __inline__ __m256i __DEFAULT_FN_ATTRS256
3009_mm256_abs_epi64 (__m256i __A) {
3010 return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
3011}
3012
3013static __inline__ __m256i __DEFAULT_FN_ATTRS256
3014_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
3015 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3016 (__v4di)_mm256_abs_epi64(__A),
3017 (__v4di)__W);
3018}
3019
3020static __inline__ __m256i __DEFAULT_FN_ATTRS256
3022 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
3023 (__v4di)_mm256_abs_epi64(__A),
3024 (__v4di)_mm256_setzero_si256());
3025}
3026
3027static __inline__ __m128i __DEFAULT_FN_ATTRS128
3028_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3029 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3030 (__v4si)_mm_max_epi32(__A, __B),
3031 (__v4si)_mm_setzero_si128());
3032}
3033
3034static __inline__ __m128i __DEFAULT_FN_ATTRS128
3035_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3037 (__v4si)_mm_max_epi32(__A, __B),
3038 (__v4si)__W);
3039}
3040
3041static __inline__ __m256i __DEFAULT_FN_ATTRS256
3042_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3043 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3044 (__v8si)_mm256_max_epi32(__A, __B),
3045 (__v8si)_mm256_setzero_si256());
3046}
3047
3048static __inline__ __m256i __DEFAULT_FN_ATTRS256
3049_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3050 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3051 (__v8si)_mm256_max_epi32(__A, __B),
3052 (__v8si)__W);
3053}
3054
3055static __inline__ __m128i __DEFAULT_FN_ATTRS128
3056_mm_max_epi64 (__m128i __A, __m128i __B) {
3057 return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
3058}
3059
3060static __inline__ __m128i __DEFAULT_FN_ATTRS128
3061_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3062 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3063 (__v2di)_mm_max_epi64(__A, __B),
3064 (__v2di)_mm_setzero_si128());
3065}
3066
3067static __inline__ __m128i __DEFAULT_FN_ATTRS128
3068_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3069 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3070 (__v2di)_mm_max_epi64(__A, __B),
3071 (__v2di)__W);
3072}
3073
3074static __inline__ __m256i __DEFAULT_FN_ATTRS256
3075_mm256_max_epi64 (__m256i __A, __m256i __B) {
3076 return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
3077}
3078
3079static __inline__ __m256i __DEFAULT_FN_ATTRS256
3080_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3081 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3082 (__v4di)_mm256_max_epi64(__A, __B),
3083 (__v4di)_mm256_setzero_si256());
3084}
3085
3086static __inline__ __m256i __DEFAULT_FN_ATTRS256
3087_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3088 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3089 (__v4di)_mm256_max_epi64(__A, __B),
3090 (__v4di)__W);
3091}
3092
3093static __inline__ __m128i __DEFAULT_FN_ATTRS128
3094_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3095 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3096 (__v4si)_mm_max_epu32(__A, __B),
3097 (__v4si)_mm_setzero_si128());
3098}
3099
3100static __inline__ __m128i __DEFAULT_FN_ATTRS128
3101_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3102 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3103 (__v4si)_mm_max_epu32(__A, __B),
3104 (__v4si)__W);
3105}
3106
3107static __inline__ __m256i __DEFAULT_FN_ATTRS256
3108_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3109 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3110 (__v8si)_mm256_max_epu32(__A, __B),
3111 (__v8si)_mm256_setzero_si256());
3112}
3113
3114static __inline__ __m256i __DEFAULT_FN_ATTRS256
3115_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3116 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3117 (__v8si)_mm256_max_epu32(__A, __B),
3118 (__v8si)__W);
3119}
3120
3121static __inline__ __m128i __DEFAULT_FN_ATTRS128
3122_mm_max_epu64 (__m128i __A, __m128i __B) {
3123 return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
3124}
3125
3126static __inline__ __m128i __DEFAULT_FN_ATTRS128
3127_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3128 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3129 (__v2di)_mm_max_epu64(__A, __B),
3130 (__v2di)_mm_setzero_si128());
3131}
3132
3133static __inline__ __m128i __DEFAULT_FN_ATTRS128
3134_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3135 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3136 (__v2di)_mm_max_epu64(__A, __B),
3137 (__v2di)__W);
3138}
3139
3140static __inline__ __m256i __DEFAULT_FN_ATTRS256
3141_mm256_max_epu64 (__m256i __A, __m256i __B) {
3142 return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
3143}
3144
3145static __inline__ __m256i __DEFAULT_FN_ATTRS256
3146_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3147 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3148 (__v4di)_mm256_max_epu64(__A, __B),
3149 (__v4di)_mm256_setzero_si256());
3150}
3151
3152static __inline__ __m256i __DEFAULT_FN_ATTRS256
3153_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3154 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3155 (__v4di)_mm256_max_epu64(__A, __B),
3156 (__v4di)__W);
3157}
3158
3159static __inline__ __m128i __DEFAULT_FN_ATTRS128
3160_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
3161 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3162 (__v4si)_mm_min_epi32(__A, __B),
3163 (__v4si)_mm_setzero_si128());
3164}
3165
3166static __inline__ __m128i __DEFAULT_FN_ATTRS128
3167_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3168 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3169 (__v4si)_mm_min_epi32(__A, __B),
3170 (__v4si)__W);
3171}
3172
3173static __inline__ __m256i __DEFAULT_FN_ATTRS256
3174_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
3175 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3176 (__v8si)_mm256_min_epi32(__A, __B),
3177 (__v8si)_mm256_setzero_si256());
3178}
3179
3180static __inline__ __m256i __DEFAULT_FN_ATTRS256
3181_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3182 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3183 (__v8si)_mm256_min_epi32(__A, __B),
3184 (__v8si)__W);
3185}
3186
3187static __inline__ __m128i __DEFAULT_FN_ATTRS128
3188_mm_min_epi64 (__m128i __A, __m128i __B) {
3189 return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
3190}
3191
3192static __inline__ __m128i __DEFAULT_FN_ATTRS128
3193_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3194 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3195 (__v2di)_mm_min_epi64(__A, __B),
3196 (__v2di)__W);
3197}
3198
3199static __inline__ __m128i __DEFAULT_FN_ATTRS128
3200_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
3201 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3202 (__v2di)_mm_min_epi64(__A, __B),
3203 (__v2di)_mm_setzero_si128());
3204}
3205
3206static __inline__ __m256i __DEFAULT_FN_ATTRS256
3207_mm256_min_epi64 (__m256i __A, __m256i __B) {
3208 return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
3209}
3210
3211static __inline__ __m256i __DEFAULT_FN_ATTRS256
3212_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3213 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3214 (__v4di)_mm256_min_epi64(__A, __B),
3215 (__v4di)__W);
3216}
3217
3218static __inline__ __m256i __DEFAULT_FN_ATTRS256
3219_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
3220 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3221 (__v4di)_mm256_min_epi64(__A, __B),
3222 (__v4di)_mm256_setzero_si256());
3223}
3224
3225static __inline__ __m128i __DEFAULT_FN_ATTRS128
3226_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
3227 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3228 (__v4si)_mm_min_epu32(__A, __B),
3229 (__v4si)_mm_setzero_si128());
3230}
3231
3232static __inline__ __m128i __DEFAULT_FN_ATTRS128
3233_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3234 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
3235 (__v4si)_mm_min_epu32(__A, __B),
3236 (__v4si)__W);
3237}
3238
3239static __inline__ __m256i __DEFAULT_FN_ATTRS256
3240_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
3241 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3242 (__v8si)_mm256_min_epu32(__A, __B),
3243 (__v8si)_mm256_setzero_si256());
3244}
3245
3246static __inline__ __m256i __DEFAULT_FN_ATTRS256
3247_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
3249 (__v8si)_mm256_min_epu32(__A, __B),
3250 (__v8si)__W);
3251}
3252
3253static __inline__ __m128i __DEFAULT_FN_ATTRS128
3254_mm_min_epu64 (__m128i __A, __m128i __B) {
3255 return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
3256}
3257
3258static __inline__ __m128i __DEFAULT_FN_ATTRS128
3259_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
3260 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3261 (__v2di)_mm_min_epu64(__A, __B),
3262 (__v2di)__W);
3263}
3264
3265static __inline__ __m128i __DEFAULT_FN_ATTRS128
3266_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
3267 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
3268 (__v2di)_mm_min_epu64(__A, __B),
3269 (__v2di)_mm_setzero_si128());
3270}
3271
3272static __inline__ __m256i __DEFAULT_FN_ATTRS256
3273_mm256_min_epu64 (__m256i __A, __m256i __B) {
3274 return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
3275}
3276
3277static __inline__ __m256i __DEFAULT_FN_ATTRS256
3278_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
3279 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3280 (__v4di)_mm256_min_epu64(__A, __B),
3281 (__v4di)__W);
3282}
3283
3284static __inline__ __m256i __DEFAULT_FN_ATTRS256
3285_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
3286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
3287 (__v4di)_mm256_min_epu64(__A, __B),
3288 (__v4di)_mm256_setzero_si256());
3289}
3290
3291#define _mm_roundscale_pd(A, imm) \
3292 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3293 (int)(imm), \
3294 (__v2df)_mm_setzero_pd(), \
3295 (__mmask8)-1)
3296
3297
3298#define _mm_mask_roundscale_pd(W, U, A, imm) \
3299 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3300 (int)(imm), \
3301 (__v2df)(__m128d)(W), \
3302 (__mmask8)(U))
3303
3304
3305#define _mm_maskz_roundscale_pd(U, A, imm) \
3306 (__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
3307 (int)(imm), \
3308 (__v2df)_mm_setzero_pd(), \
3309 (__mmask8)(U))
3310
3311
3312#define _mm256_roundscale_pd(A, imm) \
3313 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3314 (int)(imm), \
3315 (__v4df)_mm256_setzero_pd(), \
3316 (__mmask8)-1)
3317
3318
3319#define _mm256_mask_roundscale_pd(W, U, A, imm) \
3320 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3321 (int)(imm), \
3322 (__v4df)(__m256d)(W), \
3323 (__mmask8)(U))
3324
3325
3326#define _mm256_maskz_roundscale_pd(U, A, imm) \
3327 (__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
3328 (int)(imm), \
3329 (__v4df)_mm256_setzero_pd(), \
3330 (__mmask8)(U))
3331
3332#define _mm_roundscale_ps(A, imm) \
3333 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3334 (__v4sf)_mm_setzero_ps(), \
3335 (__mmask8)-1)
3336
3337
3338#define _mm_mask_roundscale_ps(W, U, A, imm) \
3339 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3340 (__v4sf)(__m128)(W), \
3341 (__mmask8)(U))
3342
3343
3344#define _mm_maskz_roundscale_ps(U, A, imm) \
3345 (__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
3346 (__v4sf)_mm_setzero_ps(), \
3347 (__mmask8)(U))
3348
3349#define _mm256_roundscale_ps(A, imm) \
3350 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3351 (__v8sf)_mm256_setzero_ps(), \
3352 (__mmask8)-1)
3353
3354#define _mm256_mask_roundscale_ps(W, U, A, imm) \
3355 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3356 (__v8sf)(__m256)(W), \
3357 (__mmask8)(U))
3358
3359
3360#define _mm256_maskz_roundscale_ps(U, A, imm) \
3361 (__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
3362 (__v8sf)_mm256_setzero_ps(), \
3363 (__mmask8)(U))
3364
3365static __inline__ __m128d __DEFAULT_FN_ATTRS128
3366_mm_scalef_pd (__m128d __A, __m128d __B) {
3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3368 (__v2df) __B,
3369 (__v2df)
3370 _mm_setzero_pd (),
3371 (__mmask8) -1);
3372}
3373
3374static __inline__ __m128d __DEFAULT_FN_ATTRS128
3375_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
3376 __m128d __B) {
3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3378 (__v2df) __B,
3379 (__v2df) __W,
3380 (__mmask8) __U);
3381}
3382
3383static __inline__ __m128d __DEFAULT_FN_ATTRS128
3384_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
3386 (__v2df) __B,
3387 (__v2df)
3388 _mm_setzero_pd (),
3389 (__mmask8) __U);
3390}
3391
3392static __inline__ __m256d __DEFAULT_FN_ATTRS256
3393_mm256_scalef_pd (__m256d __A, __m256d __B) {
3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3395 (__v4df) __B,
3396 (__v4df)
3398 (__mmask8) -1);
3399}
3400
3401static __inline__ __m256d __DEFAULT_FN_ATTRS256
3402_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
3403 __m256d __B) {
3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3405 (__v4df) __B,
3406 (__v4df) __W,
3407 (__mmask8) __U);
3408}
3409
3410static __inline__ __m256d __DEFAULT_FN_ATTRS256
3411_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
3413 (__v4df) __B,
3414 (__v4df)
3416 (__mmask8) __U);
3417}
3418
3419static __inline__ __m128 __DEFAULT_FN_ATTRS128
3420_mm_scalef_ps (__m128 __A, __m128 __B) {
3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3422 (__v4sf) __B,
3423 (__v4sf)
3424 _mm_setzero_ps (),
3425 (__mmask8) -1);
3426}
3427
3428static __inline__ __m128 __DEFAULT_FN_ATTRS128
3429_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3431 (__v4sf) __B,
3432 (__v4sf) __W,
3433 (__mmask8) __U);
3434}
3435
3436static __inline__ __m128 __DEFAULT_FN_ATTRS128
3437_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
3439 (__v4sf) __B,
3440 (__v4sf)
3441 _mm_setzero_ps (),
3442 (__mmask8) __U);
3443}
3444
3445static __inline__ __m256 __DEFAULT_FN_ATTRS256
3446_mm256_scalef_ps (__m256 __A, __m256 __B) {
3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3448 (__v8sf) __B,
3449 (__v8sf)
3451 (__mmask8) -1);
3452}
3453
3454static __inline__ __m256 __DEFAULT_FN_ATTRS256
3455_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
3456 __m256 __B) {
3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3458 (__v8sf) __B,
3459 (__v8sf) __W,
3460 (__mmask8) __U);
3461}
3462
3463static __inline__ __m256 __DEFAULT_FN_ATTRS256
3464_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
3466 (__v8sf) __B,
3467 (__v8sf)
3469 (__mmask8) __U);
3470}
3471
3472#define _mm_i64scatter_pd(addr, index, v1, scale) \
3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
3474 (__v2di)(__m128i)(index), \
3475 (__v2df)(__m128d)(v1), (int)(scale))
3476
3477#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
3479 (__v2di)(__m128i)(index), \
3480 (__v2df)(__m128d)(v1), (int)(scale))
3481
3482#define _mm_i64scatter_epi64(addr, index, v1, scale) \
3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
3484 (__v2di)(__m128i)(index), \
3485 (__v2di)(__m128i)(v1), (int)(scale))
3486
3487#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
3489 (__v2di)(__m128i)(index), \
3490 (__v2di)(__m128i)(v1), (int)(scale))
3491
3492#define _mm256_i64scatter_pd(addr, index, v1, scale) \
3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
3494 (__v4di)(__m256i)(index), \
3495 (__v4df)(__m256d)(v1), (int)(scale))
3496
3497#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
3499 (__v4di)(__m256i)(index), \
3500 (__v4df)(__m256d)(v1), (int)(scale))
3501
3502#define _mm256_i64scatter_epi64(addr, index, v1, scale) \
3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
3504 (__v4di)(__m256i)(index), \
3505 (__v4di)(__m256i)(v1), (int)(scale))
3506
3507#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
3509 (__v4di)(__m256i)(index), \
3510 (__v4di)(__m256i)(v1), (int)(scale))
3511
3512#define _mm_i64scatter_ps(addr, index, v1, scale) \
3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3515 (int)(scale))
3516
3517#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
3520 (int)(scale))
3521
3522#define _mm_i64scatter_epi32(addr, index, v1, scale) \
3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
3524 (__v2di)(__m128i)(index), \
3525 (__v4si)(__m128i)(v1), (int)(scale))
3526
3527#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
3529 (__v2di)(__m128i)(index), \
3530 (__v4si)(__m128i)(v1), (int)(scale))
3531
3532#define _mm256_i64scatter_ps(addr, index, v1, scale) \
3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3535 (int)(scale))
3536
3537#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
3540 (int)(scale))
3541
3542#define _mm256_i64scatter_epi32(addr, index, v1, scale) \
3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
3544 (__v4di)(__m256i)(index), \
3545 (__v4si)(__m128i)(v1), (int)(scale))
3546
3547#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
3549 (__v4di)(__m256i)(index), \
3550 (__v4si)(__m128i)(v1), (int)(scale))
3551
3552#define _mm_i32scatter_pd(addr, index, v1, scale) \
3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
3554 (__v4si)(__m128i)(index), \
3555 (__v2df)(__m128d)(v1), (int)(scale))
3556
3557#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
3559 (__v4si)(__m128i)(index), \
3560 (__v2df)(__m128d)(v1), (int)(scale))
3561
3562#define _mm_i32scatter_epi64(addr, index, v1, scale) \
3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
3564 (__v4si)(__m128i)(index), \
3565 (__v2di)(__m128i)(v1), (int)(scale))
3566
3567#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
3569 (__v4si)(__m128i)(index), \
3570 (__v2di)(__m128i)(v1), (int)(scale))
3571
3572#define _mm256_i32scatter_pd(addr, index, v1, scale) \
3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
3574 (__v4si)(__m128i)(index), \
3575 (__v4df)(__m256d)(v1), (int)(scale))
3576
3577#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
3579 (__v4si)(__m128i)(index), \
3580 (__v4df)(__m256d)(v1), (int)(scale))
3581
3582#define _mm256_i32scatter_epi64(addr, index, v1, scale) \
3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
3584 (__v4si)(__m128i)(index), \
3585 (__v4di)(__m256i)(v1), (int)(scale))
3586
3587#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
3589 (__v4si)(__m128i)(index), \
3590 (__v4di)(__m256i)(v1), (int)(scale))
3591
3592#define _mm_i32scatter_ps(addr, index, v1, scale) \
3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3595 (int)(scale))
3596
3597#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
3600 (int)(scale))
3601
3602#define _mm_i32scatter_epi32(addr, index, v1, scale) \
3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
3604 (__v4si)(__m128i)(index), \
3605 (__v4si)(__m128i)(v1), (int)(scale))
3606
3607#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
3609 (__v4si)(__m128i)(index), \
3610 (__v4si)(__m128i)(v1), (int)(scale))
3611
3612#define _mm256_i32scatter_ps(addr, index, v1, scale) \
3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3615 (int)(scale))
3616
3617#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
3620 (int)(scale))
3621
3622#define _mm256_i32scatter_epi32(addr, index, v1, scale) \
3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
3624 (__v8si)(__m256i)(index), \
3625 (__v8si)(__m256i)(v1), (int)(scale))
3626
3627#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
3629 (__v8si)(__m256i)(index), \
3630 (__v8si)(__m256i)(v1), (int)(scale))
3631
3632 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3633 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
3634 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3635 (__v2df)_mm_sqrt_pd(__A),
3636 (__v2df)__W);
3637 }
3638
3639 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3640 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
3641 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3642 (__v2df)_mm_sqrt_pd(__A),
3643 (__v2df)_mm_setzero_pd());
3644 }
3645
3646 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3647 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
3648 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3649 (__v4df)_mm256_sqrt_pd(__A),
3650 (__v4df)__W);
3651 }
3652
3653 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3654 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
3655 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3656 (__v4df)_mm256_sqrt_pd(__A),
3657 (__v4df)_mm256_setzero_pd());
3658 }
3659
3660 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3661 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
3662 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3663 (__v4sf)_mm_sqrt_ps(__A),
3664 (__v4sf)__W);
3665 }
3666
3667 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3668 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
3669 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3670 (__v4sf)_mm_sqrt_ps(__A),
3671 (__v4sf)_mm_setzero_ps());
3672 }
3673
3674 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3675 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
3676 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3677 (__v8sf)_mm256_sqrt_ps(__A),
3678 (__v8sf)__W);
3679 }
3680
3681 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3683 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3684 (__v8sf)_mm256_sqrt_ps(__A),
3685 (__v8sf)_mm256_setzero_ps());
3686 }
3687
3688 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3689 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
3690 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3691 (__v2df)_mm_sub_pd(__A, __B),
3692 (__v2df)__W);
3693 }
3694
3695 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3696 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
3697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
3698 (__v2df)_mm_sub_pd(__A, __B),
3699 (__v2df)_mm_setzero_pd());
3700 }
3701
3702 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3703 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
3704 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3705 (__v4df)_mm256_sub_pd(__A, __B),
3706 (__v4df)__W);
3707 }
3708
3709 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3710 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
3711 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
3712 (__v4df)_mm256_sub_pd(__A, __B),
3713 (__v4df)_mm256_setzero_pd());
3714 }
3715
3716 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3717 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
3718 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3719 (__v4sf)_mm_sub_ps(__A, __B),
3720 (__v4sf)__W);
3721 }
3722
3723 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3724 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
3725 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
3726 (__v4sf)_mm_sub_ps(__A, __B),
3727 (__v4sf)_mm_setzero_ps());
3728 }
3729
3730 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3731 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
3732 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3733 (__v8sf)_mm256_sub_ps(__A, __B),
3734 (__v8sf)__W);
3735 }
3736
3737 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3738 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
3739 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
3740 (__v8sf)_mm256_sub_ps(__A, __B),
3741 (__v8sf)_mm256_setzero_ps());
3742 }
3743
3744 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3745 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
3747 (__v4si)__B);
3748 }
3749
3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3751 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
3752 __m128i __B) {
3753 return (__m128i)__builtin_ia32_selectd_128(__U,
3754 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3755 (__v4si)__A);
3756 }
3757
3758 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3759 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
3760 __m128i __B) {
3761 return (__m128i)__builtin_ia32_selectd_128(__U,
3762 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3763 (__v4si)__I);
3764 }
3765
3766 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3767 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
3768 __m128i __B) {
3769 return (__m128i)__builtin_ia32_selectd_128(__U,
3770 (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
3771 (__v4si)_mm_setzero_si128());
3772 }
3773
3774 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3775 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
3777 (__v8si) __B);
3778 }
3779
3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3781 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
3782 __m256i __B) {
3783 return (__m256i)__builtin_ia32_selectd_256(__U,
3784 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3785 (__v8si)__A);
3786 }
3787
3788 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3789 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
3790 __m256i __B) {
3791 return (__m256i)__builtin_ia32_selectd_256(__U,
3792 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3793 (__v8si)__I);
3794 }
3795
3796 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3797 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
3798 __m256i __B) {
3799 return (__m256i)__builtin_ia32_selectd_256(__U,
3800 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
3801 (__v8si)_mm256_setzero_si256());
3802 }
3803
3804 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3805 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
3807 (__v2df)__B);
3808 }
3809
3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3811 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
3812 return (__m128d)__builtin_ia32_selectpd_128(__U,
3813 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3814 (__v2df)__A);
3815 }
3816
3817 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3818 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
3819 return (__m128d)__builtin_ia32_selectpd_128(__U,
3820 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3821 (__v2df)(__m128d)__I);
3822 }
3823
3824 static __inline__ __m128d __DEFAULT_FN_ATTRS128
3825 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
3826 return (__m128d)__builtin_ia32_selectpd_128(__U,
3827 (__v2df)_mm_permutex2var_pd(__A, __I, __B),
3828 (__v2df)_mm_setzero_pd());
3829 }
3830
3831 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3832 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
3834 (__v4df)__B);
3835 }
3836
3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3838 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
3839 __m256d __B) {
3840 return (__m256d)__builtin_ia32_selectpd_256(__U,
3841 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3842 (__v4df)__A);
3843 }
3844
3845 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3846 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
3847 __m256d __B) {
3848 return (__m256d)__builtin_ia32_selectpd_256(__U,
3849 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3850 (__v4df)(__m256d)__I);
3851 }
3852
3853 static __inline__ __m256d __DEFAULT_FN_ATTRS256
3854 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
3855 __m256d __B) {
3856 return (__m256d)__builtin_ia32_selectpd_256(__U,
3857 (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
3858 (__v4df)_mm256_setzero_pd());
3859 }
3860
3861 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3862 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
3864 (__v4sf)__B);
3865 }
3866
3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3868 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
3869 return (__m128)__builtin_ia32_selectps_128(__U,
3870 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3871 (__v4sf)__A);
3872 }
3873
3874 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3875 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
3876 return (__m128)__builtin_ia32_selectps_128(__U,
3877 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3878 (__v4sf)(__m128)__I);
3879 }
3880
3881 static __inline__ __m128 __DEFAULT_FN_ATTRS128
3882 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
3883 return (__m128)__builtin_ia32_selectps_128(__U,
3884 (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
3885 (__v4sf)_mm_setzero_ps());
3886 }
3887
3888 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3889 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
3891 (__v8sf) __B);
3892 }
3893
3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3895 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
3896 return (__m256)__builtin_ia32_selectps_256(__U,
3897 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3898 (__v8sf)__A);
3899 }
3900
3901 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3902 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
3903 __m256 __B) {
3904 return (__m256)__builtin_ia32_selectps_256(__U,
3905 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3906 (__v8sf)(__m256)__I);
3907 }
3908
3909 static __inline__ __m256 __DEFAULT_FN_ATTRS256
3910 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
3911 __m256 __B) {
3912 return (__m256)__builtin_ia32_selectps_256(__U,
3913 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
3914 (__v8sf)_mm256_setzero_ps());
3915 }
3916
3917 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3918 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
3920 (__v2di)__B);
3921 }
3922
3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3924 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
3925 __m128i __B) {
3926 return (__m128i)__builtin_ia32_selectq_128(__U,
3927 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3928 (__v2di)__A);
3929 }
3930
3931 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3932 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
3933 __m128i __B) {
3934 return (__m128i)__builtin_ia32_selectq_128(__U,
3935 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3936 (__v2di)__I);
3937 }
3938
3939 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3940 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
3941 __m128i __B) {
3942 return (__m128i)__builtin_ia32_selectq_128(__U,
3943 (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
3944 (__v2di)_mm_setzero_si128());
3945 }
3946
3947
3948 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3949 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
3951 (__v4di) __B);
3952 }
3953
3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3955 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
3956 __m256i __B) {
3957 return (__m256i)__builtin_ia32_selectq_256(__U,
3958 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3959 (__v4di)__A);
3960 }
3961
3962 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3963 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
3964 __m256i __B) {
3965 return (__m256i)__builtin_ia32_selectq_256(__U,
3966 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3967 (__v4di)__I);
3968 }
3969
3970 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3971 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
3972 __m256i __B) {
3973 return (__m256i)__builtin_ia32_selectq_256(__U,
3974 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
3975 (__v4di)_mm256_setzero_si256());
3976 }
3977
3978 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3979 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
3980 {
3981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3982 (__v4si)_mm_cvtepi8_epi32(__A),
3983 (__v4si)__W);
3984 }
3985
3986 static __inline__ __m128i __DEFAULT_FN_ATTRS128
3988 {
3989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
3990 (__v4si)_mm_cvtepi8_epi32(__A),
3991 (__v4si)_mm_setzero_si128());
3992 }
3993
3994 static __inline__ __m256i __DEFAULT_FN_ATTRS256
3995 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
3996 {
3997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
3998 (__v8si)_mm256_cvtepi8_epi32(__A),
3999 (__v8si)__W);
4000 }
4001
4002 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4004 {
4005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4006 (__v8si)_mm256_cvtepi8_epi32(__A),
4007 (__v8si)_mm256_setzero_si256());
4008 }
4009
4010 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4011 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4012 {
4013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4014 (__v2di)_mm_cvtepi8_epi64(__A),
4015 (__v2di)__W);
4016 }
4017
4018 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4020 {
4021 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4022 (__v2di)_mm_cvtepi8_epi64(__A),
4023 (__v2di)_mm_setzero_si128());
4024 }
4025
4026 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4027 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4028 {
4029 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4030 (__v4di)_mm256_cvtepi8_epi64(__A),
4031 (__v4di)__W);
4032 }
4033
4034 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4036 {
4037 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4038 (__v4di)_mm256_cvtepi8_epi64(__A),
4039 (__v4di)_mm256_setzero_si256());
4040 }
4041
4042 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4043 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4044 {
4045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4046 (__v2di)_mm_cvtepi32_epi64(__X),
4047 (__v2di)__W);
4048 }
4049
4050 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4052 {
4053 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4054 (__v2di)_mm_cvtepi32_epi64(__X),
4055 (__v2di)_mm_setzero_si128());
4056 }
4057
4058 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4059 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4060 {
4061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4062 (__v4di)_mm256_cvtepi32_epi64(__X),
4063 (__v4di)__W);
4064 }
4065
4066 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4068 {
4069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4070 (__v4di)_mm256_cvtepi32_epi64(__X),
4071 (__v4di)_mm256_setzero_si256());
4072 }
4073
4074 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4075 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4076 {
4077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4078 (__v4si)_mm_cvtepi16_epi32(__A),
4079 (__v4si)__W);
4080 }
4081
4082 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4084 {
4085 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4086 (__v4si)_mm_cvtepi16_epi32(__A),
4087 (__v4si)_mm_setzero_si128());
4088 }
4089
4090 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4091 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4092 {
4093 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4094 (__v8si)_mm256_cvtepi16_epi32(__A),
4095 (__v8si)__W);
4096 }
4097
4098 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4100 {
4101 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4102 (__v8si)_mm256_cvtepi16_epi32(__A),
4103 (__v8si)_mm256_setzero_si256());
4104 }
4105
4106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4107 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4108 {
4109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4110 (__v2di)_mm_cvtepi16_epi64(__A),
4111 (__v2di)__W);
4112 }
4113
4114 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4116 {
4117 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4118 (__v2di)_mm_cvtepi16_epi64(__A),
4119 (__v2di)_mm_setzero_si128());
4120 }
4121
4122 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4123 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4124 {
4125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4126 (__v4di)_mm256_cvtepi16_epi64(__A),
4127 (__v4di)__W);
4128 }
4129
4130 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4132 {
4133 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4134 (__v4di)_mm256_cvtepi16_epi64(__A),
4135 (__v4di)_mm256_setzero_si256());
4136 }
4137
4138
4139 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4140 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4141 {
4142 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4143 (__v4si)_mm_cvtepu8_epi32(__A),
4144 (__v4si)__W);
4145 }
4146
4147 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4149 {
4150 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4151 (__v4si)_mm_cvtepu8_epi32(__A),
4152 (__v4si)_mm_setzero_si128());
4153 }
4154
4155 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4156 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4157 {
4158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4159 (__v8si)_mm256_cvtepu8_epi32(__A),
4160 (__v8si)__W);
4161 }
4162
4163 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4165 {
4166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4167 (__v8si)_mm256_cvtepu8_epi32(__A),
4168 (__v8si)_mm256_setzero_si256());
4169 }
4170
4171 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4172 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4173 {
4174 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4175 (__v2di)_mm_cvtepu8_epi64(__A),
4176 (__v2di)__W);
4177 }
4178
4179 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4181 {
4182 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4183 (__v2di)_mm_cvtepu8_epi64(__A),
4184 (__v2di)_mm_setzero_si128());
4185 }
4186
4187 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4188 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4189 {
4190 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4191 (__v4di)_mm256_cvtepu8_epi64(__A),
4192 (__v4di)__W);
4193 }
4194
4195 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4197 {
4198 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4199 (__v4di)_mm256_cvtepu8_epi64(__A),
4200 (__v4di)_mm256_setzero_si256());
4201 }
4202
4203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4204 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
4205 {
4206 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4207 (__v2di)_mm_cvtepu32_epi64(__X),
4208 (__v2di)__W);
4209 }
4210
4211 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4213 {
4214 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4215 (__v2di)_mm_cvtepu32_epi64(__X),
4216 (__v2di)_mm_setzero_si128());
4217 }
4218
4219 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4220 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
4221 {
4222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4223 (__v4di)_mm256_cvtepu32_epi64(__X),
4224 (__v4di)__W);
4225 }
4226
4227 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4229 {
4230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4231 (__v4di)_mm256_cvtepu32_epi64(__X),
4232 (__v4di)_mm256_setzero_si256());
4233 }
4234
4235 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4236 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
4237 {
4238 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4239 (__v4si)_mm_cvtepu16_epi32(__A),
4240 (__v4si)__W);
4241 }
4242
4243 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4245 {
4246 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4247 (__v4si)_mm_cvtepu16_epi32(__A),
4248 (__v4si)_mm_setzero_si128());
4249 }
4250
4251 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4252 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
4253 {
4254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4255 (__v8si)_mm256_cvtepu16_epi32(__A),
4256 (__v8si)__W);
4257 }
4258
4259 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4261 {
4262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4263 (__v8si)_mm256_cvtepu16_epi32(__A),
4264 (__v8si)_mm256_setzero_si256());
4265 }
4266
4267 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4268 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
4269 {
4270 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4271 (__v2di)_mm_cvtepu16_epi64(__A),
4272 (__v2di)__W);
4273 }
4274
4275 static __inline__ __m128i __DEFAULT_FN_ATTRS128
4277 {
4278 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4279 (__v2di)_mm_cvtepu16_epi64(__A),
4280 (__v2di)_mm_setzero_si128());
4281 }
4282
4283 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4284 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
4285 {
4286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4287 (__v4di)_mm256_cvtepu16_epi64(__A),
4288 (__v4di)__W);
4289 }
4290
4291 static __inline__ __m256i __DEFAULT_FN_ATTRS256
4293 {
4294 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4295 (__v4di)_mm256_cvtepu16_epi64(__A),
4296 (__v4di)_mm256_setzero_si256());
4297 }
4298
4299
4300#define _mm_rol_epi32(a, b) \
4301 (__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))
4302
4303#define _mm_mask_rol_epi32(w, u, a, b) \
4304 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4305 (__v4si)_mm_rol_epi32((a), (b)), \
4306 (__v4si)(__m128i)(w))
4307
4308#define _mm_maskz_rol_epi32(u, a, b) \
4309 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4310 (__v4si)_mm_rol_epi32((a), (b)), \
4311 (__v4si)_mm_setzero_si128())
4312
4313#define _mm256_rol_epi32(a, b) \
4314 (__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))
4315
4316#define _mm256_mask_rol_epi32(w, u, a, b) \
4317 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4318 (__v8si)_mm256_rol_epi32((a), (b)), \
4319 (__v8si)(__m256i)(w))
4320
4321#define _mm256_maskz_rol_epi32(u, a, b) \
4322 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4323 (__v8si)_mm256_rol_epi32((a), (b)), \
4324 (__v8si)_mm256_setzero_si256())
4325
4326#define _mm_rol_epi64(a, b) \
4327 (__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))
4328
4329#define _mm_mask_rol_epi64(w, u, a, b) \
4330 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4331 (__v2di)_mm_rol_epi64((a), (b)), \
4332 (__v2di)(__m128i)(w))
4333
4334#define _mm_maskz_rol_epi64(u, a, b) \
4335 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4336 (__v2di)_mm_rol_epi64((a), (b)), \
4337 (__v2di)_mm_setzero_si128())
4338
4339#define _mm256_rol_epi64(a, b) \
4340 (__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))
4341
4342#define _mm256_mask_rol_epi64(w, u, a, b) \
4343 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4344 (__v4di)_mm256_rol_epi64((a), (b)), \
4345 (__v4di)(__m256i)(w))
4346
4347#define _mm256_maskz_rol_epi64(u, a, b) \
4348 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4349 (__v4di)_mm256_rol_epi64((a), (b)), \
4350 (__v4di)_mm256_setzero_si256())
4351
4352static __inline__ __m128i __DEFAULT_FN_ATTRS128
4353_mm_rolv_epi32 (__m128i __A, __m128i __B)
4354{
4355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
4356}
4357
4358static __inline__ __m128i __DEFAULT_FN_ATTRS128
4359_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4360{
4361 return (__m128i)__builtin_ia32_selectd_128(__U,
4362 (__v4si)_mm_rolv_epi32(__A, __B),
4363 (__v4si)__W);
4364}
4365
4366static __inline__ __m128i __DEFAULT_FN_ATTRS128
4367_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4368{
4369 return (__m128i)__builtin_ia32_selectd_128(__U,
4370 (__v4si)_mm_rolv_epi32(__A, __B),
4371 (__v4si)_mm_setzero_si128());
4372}
4373
4374static __inline__ __m256i __DEFAULT_FN_ATTRS256
4375_mm256_rolv_epi32 (__m256i __A, __m256i __B)
4376{
4377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
4378}
4379
4380static __inline__ __m256i __DEFAULT_FN_ATTRS256
4381_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4382{
4383 return (__m256i)__builtin_ia32_selectd_256(__U,
4384 (__v8si)_mm256_rolv_epi32(__A, __B),
4385 (__v8si)__W);
4386}
4387
4388static __inline__ __m256i __DEFAULT_FN_ATTRS256
4389_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4390{
4391 return (__m256i)__builtin_ia32_selectd_256(__U,
4392 (__v8si)_mm256_rolv_epi32(__A, __B),
4393 (__v8si)_mm256_setzero_si256());
4394}
4395
4396static __inline__ __m128i __DEFAULT_FN_ATTRS128
4397_mm_rolv_epi64 (__m128i __A, __m128i __B)
4398{
4399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
4400}
4401
4402static __inline__ __m128i __DEFAULT_FN_ATTRS128
4403_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4404{
4405 return (__m128i)__builtin_ia32_selectq_128(__U,
4406 (__v2di)_mm_rolv_epi64(__A, __B),
4407 (__v2di)__W);
4408}
4409
4410static __inline__ __m128i __DEFAULT_FN_ATTRS128
4411_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4412{
4413 return (__m128i)__builtin_ia32_selectq_128(__U,
4414 (__v2di)_mm_rolv_epi64(__A, __B),
4415 (__v2di)_mm_setzero_si128());
4416}
4417
4418static __inline__ __m256i __DEFAULT_FN_ATTRS256
4419_mm256_rolv_epi64 (__m256i __A, __m256i __B)
4420{
4421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
4422}
4423
4424static __inline__ __m256i __DEFAULT_FN_ATTRS256
4425_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4426{
4427 return (__m256i)__builtin_ia32_selectq_256(__U,
4428 (__v4di)_mm256_rolv_epi64(__A, __B),
4429 (__v4di)__W);
4430}
4431
4432static __inline__ __m256i __DEFAULT_FN_ATTRS256
4433_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4434{
4435 return (__m256i)__builtin_ia32_selectq_256(__U,
4436 (__v4di)_mm256_rolv_epi64(__A, __B),
4437 (__v4di)_mm256_setzero_si256());
4438}
4439
4440#define _mm_ror_epi32(a, b) \
4441 (__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))
4442
4443#define _mm_mask_ror_epi32(w, u, a, b) \
4444 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4445 (__v4si)_mm_ror_epi32((a), (b)), \
4446 (__v4si)(__m128i)(w))
4447
4448#define _mm_maskz_ror_epi32(u, a, b) \
4449 (__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
4450 (__v4si)_mm_ror_epi32((a), (b)), \
4451 (__v4si)_mm_setzero_si128())
4452
4453#define _mm256_ror_epi32(a, b) \
4454 (__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))
4455
4456#define _mm256_mask_ror_epi32(w, u, a, b) \
4457 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4458 (__v8si)_mm256_ror_epi32((a), (b)), \
4459 (__v8si)(__m256i)(w))
4460
4461#define _mm256_maskz_ror_epi32(u, a, b) \
4462 (__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
4463 (__v8si)_mm256_ror_epi32((a), (b)), \
4464 (__v8si)_mm256_setzero_si256())
4465
4466#define _mm_ror_epi64(a, b) \
4467 (__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))
4468
4469#define _mm_mask_ror_epi64(w, u, a, b) \
4470 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4471 (__v2di)_mm_ror_epi64((a), (b)), \
4472 (__v2di)(__m128i)(w))
4473
4474#define _mm_maskz_ror_epi64(u, a, b) \
4475 (__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
4476 (__v2di)_mm_ror_epi64((a), (b)), \
4477 (__v2di)_mm_setzero_si128())
4478
4479#define _mm256_ror_epi64(a, b) \
4480 (__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))
4481
4482#define _mm256_mask_ror_epi64(w, u, a, b) \
4483 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4484 (__v4di)_mm256_ror_epi64((a), (b)), \
4485 (__v4di)(__m256i)(w))
4486
4487#define _mm256_maskz_ror_epi64(u, a, b) \
4488 (__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
4489 (__v4di)_mm256_ror_epi64((a), (b)), \
4490 (__v4di)_mm256_setzero_si256())
4491
4492static __inline__ __m128i __DEFAULT_FN_ATTRS128
4493_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4494{
4495 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4496 (__v4si)_mm_sll_epi32(__A, __B),
4497 (__v4si)__W);
4498}
4499
4500static __inline__ __m128i __DEFAULT_FN_ATTRS128
4501_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4502{
4503 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4504 (__v4si)_mm_sll_epi32(__A, __B),
4505 (__v4si)_mm_setzero_si128());
4506}
4507
4508static __inline__ __m256i __DEFAULT_FN_ATTRS256
4509_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4510{
4511 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4512 (__v8si)_mm256_sll_epi32(__A, __B),
4513 (__v8si)__W);
4514}
4515
4516static __inline__ __m256i __DEFAULT_FN_ATTRS256
4517_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4518{
4519 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4520 (__v8si)_mm256_sll_epi32(__A, __B),
4521 (__v8si)_mm256_setzero_si256());
4522}
4523
4524static __inline__ __m128i __DEFAULT_FN_ATTRS128
4525_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4526{
4527 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4528 (__v4si)_mm_slli_epi32(__A, __B),
4529 (__v4si)__W);
4530}
4531
4532static __inline__ __m128i __DEFAULT_FN_ATTRS128
4533_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4534{
4535 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4536 (__v4si)_mm_slli_epi32(__A, __B),
4537 (__v4si)_mm_setzero_si128());
4538}
4539
4540static __inline__ __m256i __DEFAULT_FN_ATTRS256
4541_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4542{
4543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4544 (__v8si)_mm256_slli_epi32(__A, __B),
4545 (__v8si)__W);
4546}
4547
4548static __inline__ __m256i __DEFAULT_FN_ATTRS256
4549_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4550{
4551 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4552 (__v8si)_mm256_slli_epi32(__A, __B),
4553 (__v8si)_mm256_setzero_si256());
4554}
4555
4556static __inline__ __m128i __DEFAULT_FN_ATTRS128
4557_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4558{
4559 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4560 (__v2di)_mm_sll_epi64(__A, __B),
4561 (__v2di)__W);
4562}
4563
4564static __inline__ __m128i __DEFAULT_FN_ATTRS128
4565_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4566{
4567 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4568 (__v2di)_mm_sll_epi64(__A, __B),
4569 (__v2di)_mm_setzero_si128());
4570}
4571
4572static __inline__ __m256i __DEFAULT_FN_ATTRS256
4573_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4574{
4575 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4576 (__v4di)_mm256_sll_epi64(__A, __B),
4577 (__v4di)__W);
4578}
4579
4580static __inline__ __m256i __DEFAULT_FN_ATTRS256
4581_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4582{
4583 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4584 (__v4di)_mm256_sll_epi64(__A, __B),
4585 (__v4di)_mm256_setzero_si256());
4586}
4587
4588static __inline__ __m128i __DEFAULT_FN_ATTRS128
4589_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4590{
4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4592 (__v2di)_mm_slli_epi64(__A, __B),
4593 (__v2di)__W);
4594}
4595
4596static __inline__ __m128i __DEFAULT_FN_ATTRS128
4597_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4598{
4599 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4600 (__v2di)_mm_slli_epi64(__A, __B),
4601 (__v2di)_mm_setzero_si128());
4602}
4603
4604static __inline__ __m256i __DEFAULT_FN_ATTRS256
4605_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4606{
4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4608 (__v4di)_mm256_slli_epi64(__A, __B),
4609 (__v4di)__W);
4610}
4611
4612static __inline__ __m256i __DEFAULT_FN_ATTRS256
4613_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4614{
4615 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4616 (__v4di)_mm256_slli_epi64(__A, __B),
4617 (__v4di)_mm256_setzero_si256());
4618}
4619
4620static __inline__ __m128i __DEFAULT_FN_ATTRS128
4621_mm_rorv_epi32 (__m128i __A, __m128i __B)
4622{
4623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
4624}
4625
4626static __inline__ __m128i __DEFAULT_FN_ATTRS128
4627_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4628{
4629 return (__m128i)__builtin_ia32_selectd_128(__U,
4630 (__v4si)_mm_rorv_epi32(__A, __B),
4631 (__v4si)__W);
4632}
4633
4634static __inline__ __m128i __DEFAULT_FN_ATTRS128
4635_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
4636{
4637 return (__m128i)__builtin_ia32_selectd_128(__U,
4638 (__v4si)_mm_rorv_epi32(__A, __B),
4639 (__v4si)_mm_setzero_si128());
4640}
4641
4642static __inline__ __m256i __DEFAULT_FN_ATTRS256
4643_mm256_rorv_epi32 (__m256i __A, __m256i __B)
4644{
4645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
4646}
4647
4648static __inline__ __m256i __DEFAULT_FN_ATTRS256
4649_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4650{
4651 return (__m256i)__builtin_ia32_selectd_256(__U,
4652 (__v8si)_mm256_rorv_epi32(__A, __B),
4653 (__v8si)__W);
4654}
4655
4656static __inline__ __m256i __DEFAULT_FN_ATTRS256
4657_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
4658{
4659 return (__m256i)__builtin_ia32_selectd_256(__U,
4660 (__v8si)_mm256_rorv_epi32(__A, __B),
4661 (__v8si)_mm256_setzero_si256());
4662}
4663
4664static __inline__ __m128i __DEFAULT_FN_ATTRS128
4665_mm_rorv_epi64 (__m128i __A, __m128i __B)
4666{
4667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
4668}
4669
4670static __inline__ __m128i __DEFAULT_FN_ATTRS128
4671_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4672{
4673 return (__m128i)__builtin_ia32_selectq_128(__U,
4674 (__v2di)_mm_rorv_epi64(__A, __B),
4675 (__v2di)__W);
4676}
4677
4678static __inline__ __m128i __DEFAULT_FN_ATTRS128
4679_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
4680{
4681 return (__m128i)__builtin_ia32_selectq_128(__U,
4682 (__v2di)_mm_rorv_epi64(__A, __B),
4683 (__v2di)_mm_setzero_si128());
4684}
4685
4686static __inline__ __m256i __DEFAULT_FN_ATTRS256
4687_mm256_rorv_epi64 (__m256i __A, __m256i __B)
4688{
4689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
4690}
4691
4692static __inline__ __m256i __DEFAULT_FN_ATTRS256
4693_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
4694{
4695 return (__m256i)__builtin_ia32_selectq_256(__U,
4696 (__v4di)_mm256_rorv_epi64(__A, __B),
4697 (__v4di)__W);
4698}
4699
4700static __inline__ __m256i __DEFAULT_FN_ATTRS256
4701_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
4702{
4703 return (__m256i)__builtin_ia32_selectq_256(__U,
4704 (__v4di)_mm256_rorv_epi64(__A, __B),
4705 (__v4di)_mm256_setzero_si256());
4706}
4707
4708static __inline__ __m128i __DEFAULT_FN_ATTRS128
4709_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4710{
4711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4712 (__v2di)_mm_sllv_epi64(__X, __Y),
4713 (__v2di)__W);
4714}
4715
4716static __inline__ __m128i __DEFAULT_FN_ATTRS128
4717_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4718{
4719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4720 (__v2di)_mm_sllv_epi64(__X, __Y),
4721 (__v2di)_mm_setzero_si128());
4722}
4723
4724static __inline__ __m256i __DEFAULT_FN_ATTRS256
4725_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4726{
4727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4728 (__v4di)_mm256_sllv_epi64(__X, __Y),
4729 (__v4di)__W);
4730}
4731
4732static __inline__ __m256i __DEFAULT_FN_ATTRS256
4733_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4734{
4735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4736 (__v4di)_mm256_sllv_epi64(__X, __Y),
4737 (__v4di)_mm256_setzero_si256());
4738}
4739
4740static __inline__ __m128i __DEFAULT_FN_ATTRS128
4741_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4742{
4743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4744 (__v4si)_mm_sllv_epi32(__X, __Y),
4745 (__v4si)__W);
4746}
4747
4748static __inline__ __m128i __DEFAULT_FN_ATTRS128
4749_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4750{
4751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4752 (__v4si)_mm_sllv_epi32(__X, __Y),
4753 (__v4si)_mm_setzero_si128());
4754}
4755
4756static __inline__ __m256i __DEFAULT_FN_ATTRS256
4757_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4758{
4759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4760 (__v8si)_mm256_sllv_epi32(__X, __Y),
4761 (__v8si)__W);
4762}
4763
4764static __inline__ __m256i __DEFAULT_FN_ATTRS256
4765_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4766{
4767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4768 (__v8si)_mm256_sllv_epi32(__X, __Y),
4769 (__v8si)_mm256_setzero_si256());
4770}
4771
4772static __inline__ __m128i __DEFAULT_FN_ATTRS128
4773_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4774{
4775 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4776 (__v2di)_mm_srlv_epi64(__X, __Y),
4777 (__v2di)__W);
4778}
4779
4780static __inline__ __m128i __DEFAULT_FN_ATTRS128
4781_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
4782{
4783 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4784 (__v2di)_mm_srlv_epi64(__X, __Y),
4785 (__v2di)_mm_setzero_si128());
4786}
4787
4788static __inline__ __m256i __DEFAULT_FN_ATTRS256
4789_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4790{
4791 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4792 (__v4di)_mm256_srlv_epi64(__X, __Y),
4793 (__v4di)__W);
4794}
4795
4796static __inline__ __m256i __DEFAULT_FN_ATTRS256
4797_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
4798{
4799 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4800 (__v4di)_mm256_srlv_epi64(__X, __Y),
4801 (__v4di)_mm256_setzero_si256());
4802}
4803
4804static __inline__ __m128i __DEFAULT_FN_ATTRS128
4805_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4806{
4807 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4808 (__v4si)_mm_srlv_epi32(__X, __Y),
4809 (__v4si)__W);
4810}
4811
4812static __inline__ __m128i __DEFAULT_FN_ATTRS128
4813_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4814{
4815 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4816 (__v4si)_mm_srlv_epi32(__X, __Y),
4817 (__v4si)_mm_setzero_si128());
4818}
4819
4820static __inline__ __m256i __DEFAULT_FN_ATTRS256
4821_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4822{
4823 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4824 (__v8si)_mm256_srlv_epi32(__X, __Y),
4825 (__v8si)__W);
4826}
4827
4828static __inline__ __m256i __DEFAULT_FN_ATTRS256
4829_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4830{
4831 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4832 (__v8si)_mm256_srlv_epi32(__X, __Y),
4833 (__v8si)_mm256_setzero_si256());
4834}
4835
4836static __inline__ __m128i __DEFAULT_FN_ATTRS128
4837_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4838{
4839 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4840 (__v4si)_mm_srl_epi32(__A, __B),
4841 (__v4si)__W);
4842}
4843
4844static __inline__ __m128i __DEFAULT_FN_ATTRS128
4845_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
4846{
4847 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4848 (__v4si)_mm_srl_epi32(__A, __B),
4849 (__v4si)_mm_setzero_si128());
4850}
4851
4852static __inline__ __m256i __DEFAULT_FN_ATTRS256
4853_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4854{
4855 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4856 (__v8si)_mm256_srl_epi32(__A, __B),
4857 (__v8si)__W);
4858}
4859
4860static __inline__ __m256i __DEFAULT_FN_ATTRS256
4861_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
4862{
4863 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4864 (__v8si)_mm256_srl_epi32(__A, __B),
4865 (__v8si)_mm256_setzero_si256());
4866}
4867
4868static __inline__ __m128i __DEFAULT_FN_ATTRS128
4869_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4870{
4871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4872 (__v4si)_mm_srli_epi32(__A, __B),
4873 (__v4si)__W);
4874}
4875
4876static __inline__ __m128i __DEFAULT_FN_ATTRS128
4877_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
4878{
4879 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4880 (__v4si)_mm_srli_epi32(__A, __B),
4881 (__v4si)_mm_setzero_si128());
4882}
4883
4884static __inline__ __m256i __DEFAULT_FN_ATTRS256
4885_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4886{
4887 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4888 (__v8si)_mm256_srli_epi32(__A, __B),
4889 (__v8si)__W);
4890}
4891
4892static __inline__ __m256i __DEFAULT_FN_ATTRS256
4893_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
4894{
4895 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4896 (__v8si)_mm256_srli_epi32(__A, __B),
4897 (__v8si)_mm256_setzero_si256());
4898}
4899
4900static __inline__ __m128i __DEFAULT_FN_ATTRS128
4901_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
4902{
4903 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4904 (__v2di)_mm_srl_epi64(__A, __B),
4905 (__v2di)__W);
4906}
4907
4908static __inline__ __m128i __DEFAULT_FN_ATTRS128
4909_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
4910{
4911 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4912 (__v2di)_mm_srl_epi64(__A, __B),
4913 (__v2di)_mm_setzero_si128());
4914}
4915
4916static __inline__ __m256i __DEFAULT_FN_ATTRS256
4917_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
4918{
4919 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4920 (__v4di)_mm256_srl_epi64(__A, __B),
4921 (__v4di)__W);
4922}
4923
4924static __inline__ __m256i __DEFAULT_FN_ATTRS256
4925_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
4926{
4927 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4928 (__v4di)_mm256_srl_epi64(__A, __B),
4929 (__v4di)_mm256_setzero_si256());
4930}
4931
4932static __inline__ __m128i __DEFAULT_FN_ATTRS128
4933_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
4934{
4935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4936 (__v2di)_mm_srli_epi64(__A, __B),
4937 (__v2di)__W);
4938}
4939
4940static __inline__ __m128i __DEFAULT_FN_ATTRS128
4941_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
4942{
4943 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
4944 (__v2di)_mm_srli_epi64(__A, __B),
4945 (__v2di)_mm_setzero_si128());
4946}
4947
4948static __inline__ __m256i __DEFAULT_FN_ATTRS256
4949_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
4950{
4951 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4952 (__v4di)_mm256_srli_epi64(__A, __B),
4953 (__v4di)__W);
4954}
4955
4956static __inline__ __m256i __DEFAULT_FN_ATTRS256
4957_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
4958{
4959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
4960 (__v4di)_mm256_srli_epi64(__A, __B),
4961 (__v4di)_mm256_setzero_si256());
4962}
4963
4964static __inline__ __m128i __DEFAULT_FN_ATTRS128
4965_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
4966{
4967 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4968 (__v4si)_mm_srav_epi32(__X, __Y),
4969 (__v4si)__W);
4970}
4971
4972static __inline__ __m128i __DEFAULT_FN_ATTRS128
4973_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
4974{
4975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
4976 (__v4si)_mm_srav_epi32(__X, __Y),
4977 (__v4si)_mm_setzero_si128());
4978}
4979
4980static __inline__ __m256i __DEFAULT_FN_ATTRS256
4981_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
4982{
4983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4984 (__v8si)_mm256_srav_epi32(__X, __Y),
4985 (__v8si)__W);
4986}
4987
4988static __inline__ __m256i __DEFAULT_FN_ATTRS256
4989_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
4990{
4991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
4992 (__v8si)_mm256_srav_epi32(__X, __Y),
4993 (__v8si)_mm256_setzero_si256());
4994}
4995
4996static __inline__ __m128i __DEFAULT_FN_ATTRS128
4997_mm_srav_epi64(__m128i __X, __m128i __Y)
4998{
4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
5000}
5001
5002static __inline__ __m128i __DEFAULT_FN_ATTRS128
5003_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
5004{
5005 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5006 (__v2di)_mm_srav_epi64(__X, __Y),
5007 (__v2di)__W);
5008}
5009
5010static __inline__ __m128i __DEFAULT_FN_ATTRS128
5011_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
5012{
5013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
5014 (__v2di)_mm_srav_epi64(__X, __Y),
5015 (__v2di)_mm_setzero_si128());
5016}
5017
5018static __inline__ __m256i __DEFAULT_FN_ATTRS256
5019_mm256_srav_epi64(__m256i __X, __m256i __Y)
5020{
5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
5022}
5023
5024static __inline__ __m256i __DEFAULT_FN_ATTRS256
5025_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
5026{
5027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5028 (__v4di)_mm256_srav_epi64(__X, __Y),
5029 (__v4di)__W);
5030}
5031
5032static __inline__ __m256i __DEFAULT_FN_ATTRS256
5033_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
5034{
5035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
5036 (__v4di)_mm256_srav_epi64(__X, __Y),
5037 (__v4di)_mm256_setzero_si256());
5038}
5039
5040static __inline__ __m128i __DEFAULT_FN_ATTRS128
5041_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
5042{
5043 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5044 (__v4si) __A,
5045 (__v4si) __W);
5046}
5047
5048static __inline__ __m128i __DEFAULT_FN_ATTRS128
5050{
5051 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
5052 (__v4si) __A,
5053 (__v4si) _mm_setzero_si128 ());
5054}
5055
5056
5057static __inline__ __m256i __DEFAULT_FN_ATTRS256
5058_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
5059{
5060 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5061 (__v8si) __A,
5062 (__v8si) __W);
5063}
5064
5065static __inline__ __m256i __DEFAULT_FN_ATTRS256
5067{
5068 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
5069 (__v8si) __A,
5070 (__v8si) _mm256_setzero_si256 ());
5071}
5072
5073static __inline __m128i __DEFAULT_FN_ATTRS128
5074_mm_load_epi32 (void const *__P)
5075{
5076 return *(const __m128i *) __P;
5077}
5078
5079static __inline__ __m128i __DEFAULT_FN_ATTRS128
5080_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5081{
5082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5083 (__v4si) __W,
5084 (__mmask8)
5085 __U);
5086}
5087
5088static __inline__ __m128i __DEFAULT_FN_ATTRS128
5089_mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
5090{
5091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
5092 (__v4si)
5094 (__mmask8)
5095 __U);
5096}
5097
5098static __inline __m256i __DEFAULT_FN_ATTRS256
5099_mm256_load_epi32 (void const *__P)
5100{
5101 return *(const __m256i *) __P;
5102}
5103
5104static __inline__ __m256i __DEFAULT_FN_ATTRS256
5105_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5106{
5107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5108 (__v8si) __W,
5109 (__mmask8)
5110 __U);
5111}
5112
5113static __inline__ __m256i __DEFAULT_FN_ATTRS256
5114_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
5115{
5116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
5117 (__v8si)
5119 (__mmask8)
5120 __U);
5121}
5122
5123static __inline void __DEFAULT_FN_ATTRS128
5124_mm_store_epi32 (void *__P, __m128i __A)
5125{
5126 *(__m128i *) __P = __A;
5127}
5128
5129static __inline__ void __DEFAULT_FN_ATTRS128
5130_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
5131{
5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
5133 (__v4si) __A,
5134 (__mmask8) __U);
5135}
5136
5137static __inline void __DEFAULT_FN_ATTRS256
5138_mm256_store_epi32 (void *__P, __m256i __A)
5139{
5140 *(__m256i *) __P = __A;
5141}
5142
5143static __inline__ void __DEFAULT_FN_ATTRS256
5144_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
5145{
5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
5147 (__v8si) __A,
5148 (__mmask8) __U);
5149}
5150
5151static __inline__ __m128i __DEFAULT_FN_ATTRS128
5152_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
5153{
5154 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5155 (__v2di) __A,
5156 (__v2di) __W);
5157}
5158
5159static __inline__ __m128i __DEFAULT_FN_ATTRS128
5161{
5162 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
5163 (__v2di) __A,
5164 (__v2di) _mm_setzero_si128 ());
5165}
5166
5167static __inline__ __m256i __DEFAULT_FN_ATTRS256
5168_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
5169{
5170 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5171 (__v4di) __A,
5172 (__v4di) __W);
5173}
5174
5175static __inline__ __m256i __DEFAULT_FN_ATTRS256
5177{
5178 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
5179 (__v4di) __A,
5180 (__v4di) _mm256_setzero_si256 ());
5181}
5182
5183static __inline __m128i __DEFAULT_FN_ATTRS128
5184_mm_load_epi64 (void const *__P)
5185{
5186 return *(const __m128i *) __P;
5187}
5188
5189static __inline__ __m128i __DEFAULT_FN_ATTRS128
5190_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5191{
5192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5193 (__v2di) __W,
5194 (__mmask8)
5195 __U);
5196}
5197
5198static __inline__ __m128i __DEFAULT_FN_ATTRS128
5199_mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
5200{
5201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
5202 (__v2di)
5204 (__mmask8)
5205 __U);
5206}
5207
5208static __inline __m256i __DEFAULT_FN_ATTRS256
5209_mm256_load_epi64 (void const *__P)
5210{
5211 return *(const __m256i *) __P;
5212}
5213
5214static __inline__ __m256i __DEFAULT_FN_ATTRS256
5215_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5216{
5217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5218 (__v4di) __W,
5219 (__mmask8)
5220 __U);
5221}
5222
5223static __inline__ __m256i __DEFAULT_FN_ATTRS256
5224_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
5225{
5226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
5227 (__v4di)
5229 (__mmask8)
5230 __U);
5231}
5232
5233static __inline void __DEFAULT_FN_ATTRS128
5234_mm_store_epi64 (void *__P, __m128i __A)
5235{
5236 *(__m128i *) __P = __A;
5237}
5238
5239static __inline__ void __DEFAULT_FN_ATTRS128
5240_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
5241{
5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
5243 (__v2di) __A,
5244 (__mmask8) __U);
5245}
5246
5247static __inline void __DEFAULT_FN_ATTRS256
5248_mm256_store_epi64 (void *__P, __m256i __A)
5249{
5250 *(__m256i *) __P = __A;
5251}
5252
5253static __inline__ void __DEFAULT_FN_ATTRS256
5254_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
5255{
5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
5257 (__v4di) __A,
5258 (__mmask8) __U);
5259}
5260
5261static __inline__ __m128d __DEFAULT_FN_ATTRS128
5262_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
5263{
5264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5265 (__v2df)_mm_movedup_pd(__A),
5266 (__v2df)__W);
5267}
5268
5269static __inline__ __m128d __DEFAULT_FN_ATTRS128
5271{
5272 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5273 (__v2df)_mm_movedup_pd(__A),
5274 (__v2df)_mm_setzero_pd());
5275}
5276
5277static __inline__ __m256d __DEFAULT_FN_ATTRS256
5278_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
5279{
5280 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5281 (__v4df)_mm256_movedup_pd(__A),
5282 (__v4df)__W);
5283}
5284
5285static __inline__ __m256d __DEFAULT_FN_ATTRS256
5287{
5288 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5289 (__v4df)_mm256_movedup_pd(__A),
5290 (__v4df)_mm256_setzero_pd());
5291}
5292
5293static __inline__ __m128i __DEFAULT_FN_ATTRS128
5294_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
5295{
5296 return (__m128i)__builtin_ia32_selectd_128(__M,
5297 (__v4si) _mm_set1_epi32(__A),
5298 (__v4si)__O);
5299}
5300
5301static __inline__ __m128i __DEFAULT_FN_ATTRS128
5303{
5304 return (__m128i)__builtin_ia32_selectd_128(__M,
5305 (__v4si) _mm_set1_epi32(__A),
5306 (__v4si)_mm_setzero_si128());
5307}
5308
5309static __inline__ __m256i __DEFAULT_FN_ATTRS256
5310_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
5311{
5312 return (__m256i)__builtin_ia32_selectd_256(__M,
5313 (__v8si) _mm256_set1_epi32(__A),
5314 (__v8si)__O);
5315}
5316
5317static __inline__ __m256i __DEFAULT_FN_ATTRS256
5319{
5320 return (__m256i)__builtin_ia32_selectd_256(__M,
5321 (__v8si) _mm256_set1_epi32(__A),
5322 (__v8si)_mm256_setzero_si256());
5323}
5324
5325
5326static __inline__ __m128i __DEFAULT_FN_ATTRS128
5327_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
5328{
5329 return (__m128i) __builtin_ia32_selectq_128(__M,
5330 (__v2di) _mm_set1_epi64x(__A),
5331 (__v2di) __O);
5332}
5333
5334static __inline__ __m128i __DEFAULT_FN_ATTRS128
5335_mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
5336{
5337 return (__m128i) __builtin_ia32_selectq_128(__M,
5338 (__v2di) _mm_set1_epi64x(__A),
5339 (__v2di) _mm_setzero_si128());
5340}
5341
5342static __inline__ __m256i __DEFAULT_FN_ATTRS256
5343_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
5344{
5345 return (__m256i) __builtin_ia32_selectq_256(__M,
5346 (__v4di) _mm256_set1_epi64x(__A),
5347 (__v4di) __O) ;
5348}
5349
5350static __inline__ __m256i __DEFAULT_FN_ATTRS256
5352{
5353 return (__m256i) __builtin_ia32_selectq_256(__M,
5354 (__v4di) _mm256_set1_epi64x(__A),
5355 (__v4di) _mm256_setzero_si256());
5356}
5357
5358#define _mm_fixupimm_pd(A, B, C, imm) \
5359 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5360 (__v2df)(__m128d)(B), \
5361 (__v2di)(__m128i)(C), (int)(imm), \
5362 (__mmask8)-1)
5363
5364#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
5365 (__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
5366 (__v2df)(__m128d)(B), \
5367 (__v2di)(__m128i)(C), (int)(imm), \
5368 (__mmask8)(U))
5369
5370#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
5371 (__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
5372 (__v2df)(__m128d)(B), \
5373 (__v2di)(__m128i)(C), \
5374 (int)(imm), (__mmask8)(U))
5375
5376#define _mm256_fixupimm_pd(A, B, C, imm) \
5377 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5378 (__v4df)(__m256d)(B), \
5379 (__v4di)(__m256i)(C), (int)(imm), \
5380 (__mmask8)-1)
5381
5382#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
5383 (__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
5384 (__v4df)(__m256d)(B), \
5385 (__v4di)(__m256i)(C), (int)(imm), \
5386 (__mmask8)(U))
5387
5388#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
5389 (__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
5390 (__v4df)(__m256d)(B), \
5391 (__v4di)(__m256i)(C), \
5392 (int)(imm), (__mmask8)(U))
5393
5394#define _mm_fixupimm_ps(A, B, C, imm) \
5395 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5396 (__v4sf)(__m128)(B), \
5397 (__v4si)(__m128i)(C), (int)(imm), \
5398 (__mmask8)-1)
5399
5400#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
5401 (__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
5402 (__v4sf)(__m128)(B), \
5403 (__v4si)(__m128i)(C), (int)(imm), \
5404 (__mmask8)(U))
5405
5406#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
5407 (__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
5408 (__v4sf)(__m128)(B), \
5409 (__v4si)(__m128i)(C), (int)(imm), \
5410 (__mmask8)(U))
5411
5412#define _mm256_fixupimm_ps(A, B, C, imm) \
5413 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5414 (__v8sf)(__m256)(B), \
5415 (__v8si)(__m256i)(C), (int)(imm), \
5416 (__mmask8)-1)
5417
5418#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
5419 (__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
5420 (__v8sf)(__m256)(B), \
5421 (__v8si)(__m256i)(C), (int)(imm), \
5422 (__mmask8)(U))
5423
5424#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
5425 (__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
5426 (__v8sf)(__m256)(B), \
5427 (__v8si)(__m256i)(C), (int)(imm), \
5428 (__mmask8)(U))
5429
5430static __inline__ __m128d __DEFAULT_FN_ATTRS128
5431_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
5432{
5433 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5434 (__v2df) __W,
5435 (__mmask8) __U);
5436}
5437
5438static __inline__ __m128d __DEFAULT_FN_ATTRS128
5439_mm_maskz_load_pd (__mmask8 __U, void const *__P)
5440{
5441 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
5442 (__v2df)
5443 _mm_setzero_pd (),
5444 (__mmask8) __U);
5445}
5446
5447static __inline__ __m256d __DEFAULT_FN_ATTRS256
5448_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
5449{
5450 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5451 (__v4df) __W,
5452 (__mmask8) __U);
5453}
5454
5455static __inline__ __m256d __DEFAULT_FN_ATTRS256
5456_mm256_maskz_load_pd (__mmask8 __U, void const *__P)
5457{
5458 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
5459 (__v4df)
5461 (__mmask8) __U);
5462}
5463
5464static __inline__ __m128 __DEFAULT_FN_ATTRS128
5465_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
5466{
5467 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5468 (__v4sf) __W,
5469 (__mmask8) __U);
5470}
5471
5472static __inline__ __m128 __DEFAULT_FN_ATTRS128
5473_mm_maskz_load_ps (__mmask8 __U, void const *__P)
5474{
5475 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
5476 (__v4sf)
5477 _mm_setzero_ps (),
5478 (__mmask8) __U);
5479}
5480
5481static __inline__ __m256 __DEFAULT_FN_ATTRS256
5482_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
5483{
5484 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5485 (__v8sf) __W,
5486 (__mmask8) __U);
5487}
5488
5489static __inline__ __m256 __DEFAULT_FN_ATTRS256
5490_mm256_maskz_load_ps (__mmask8 __U, void const *__P)
5491{
5492 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
5493 (__v8sf)
5495 (__mmask8) __U);
5496}
5497
5498static __inline __m128i __DEFAULT_FN_ATTRS128
5499_mm_loadu_epi64 (void const *__P)
5500{
5501 struct __loadu_epi64 {
5502 __m128i_u __v;
5503 } __attribute__((__packed__, __may_alias__));
5504 return ((const struct __loadu_epi64*)__P)->__v;
5505}
5506
5507static __inline__ __m128i __DEFAULT_FN_ATTRS128
5508_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
5509{
5510 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5511 (__v2di) __W,
5512 (__mmask8) __U);
5513}
5514
5515static __inline__ __m128i __DEFAULT_FN_ATTRS128
5516_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
5517{
5518 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
5519 (__v2di)
5521 (__mmask8) __U);
5522}
5523
5524static __inline __m256i __DEFAULT_FN_ATTRS256
5525_mm256_loadu_epi64 (void const *__P)
5526{
5527 struct __loadu_epi64 {
5528 __m256i_u __v;
5529 } __attribute__((__packed__, __may_alias__));
5530 return ((const struct __loadu_epi64*)__P)->__v;
5531}
5532
5533static __inline__ __m256i __DEFAULT_FN_ATTRS256
5534_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
5535{
5536 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5537 (__v4di) __W,
5538 (__mmask8) __U);
5539}
5540
5541static __inline__ __m256i __DEFAULT_FN_ATTRS256
5543{
5544 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
5545 (__v4di)
5547 (__mmask8) __U);
5548}
5549
5550static __inline __m128i __DEFAULT_FN_ATTRS128
5551_mm_loadu_epi32 (void const *__P)
5552{
5553 struct __loadu_epi32 {
5554 __m128i_u __v;
5555 } __attribute__((__packed__, __may_alias__));
5556 return ((const struct __loadu_epi32*)__P)->__v;
5557}
5558
5559static __inline__ __m128i __DEFAULT_FN_ATTRS128
5560_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
5561{
5562 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5563 (__v4si) __W,
5564 (__mmask8) __U);
5565}
5566
5567static __inline__ __m128i __DEFAULT_FN_ATTRS128
5568_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
5569{
5570 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
5571 (__v4si)
5573 (__mmask8) __U);
5574}
5575
5576static __inline __m256i __DEFAULT_FN_ATTRS256
5577_mm256_loadu_epi32 (void const *__P)
5578{
5579 struct __loadu_epi32 {
5580 __m256i_u __v;
5581 } __attribute__((__packed__, __may_alias__));
5582 return ((const struct __loadu_epi32*)__P)->__v;
5583}
5584
5585static __inline__ __m256i __DEFAULT_FN_ATTRS256
5586_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
5587{
5588 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5589 (__v8si) __W,
5590 (__mmask8) __U);
5591}
5592
5593static __inline__ __m256i __DEFAULT_FN_ATTRS256
5595{
5596 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
5597 (__v8si)
5599 (__mmask8) __U);
5600}
5601
5602static __inline__ __m128d __DEFAULT_FN_ATTRS128
5603_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
5604{
5605 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5606 (__v2df) __W,
5607 (__mmask8) __U);
5608}
5609
5610static __inline__ __m128d __DEFAULT_FN_ATTRS128
5611_mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
5612{
5613 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
5614 (__v2df)
5615 _mm_setzero_pd (),
5616 (__mmask8) __U);
5617}
5618
5619static __inline__ __m256d __DEFAULT_FN_ATTRS256
5620_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
5621{
5622 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5623 (__v4df) __W,
5624 (__mmask8) __U);
5625}
5626
5627static __inline__ __m256d __DEFAULT_FN_ATTRS256
5628_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
5629{
5630 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
5631 (__v4df)
5633 (__mmask8) __U);
5634}
5635
5636static __inline__ __m128 __DEFAULT_FN_ATTRS128
5637_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
5638{
5639 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5640 (__v4sf) __W,
5641 (__mmask8) __U);
5642}
5643
5644static __inline__ __m128 __DEFAULT_FN_ATTRS128
5645_mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
5646{
5647 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
5648 (__v4sf)
5649 _mm_setzero_ps (),
5650 (__mmask8) __U);
5651}
5652
5653static __inline__ __m256 __DEFAULT_FN_ATTRS256
5654_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
5655{
5656 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5657 (__v8sf) __W,
5658 (__mmask8) __U);
5659}
5660
5661static __inline__ __m256 __DEFAULT_FN_ATTRS256
5662_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
5663{
5664 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
5665 (__v8sf)
5667 (__mmask8) __U);
5668}
5669
5670static __inline__ void __DEFAULT_FN_ATTRS128
5671_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
5672{
5673 __builtin_ia32_storeapd128_mask ((__v2df *) __P,
5674 (__v2df) __A,
5675 (__mmask8) __U);
5676}
5677
5678static __inline__ void __DEFAULT_FN_ATTRS256
5679_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
5680{
5681 __builtin_ia32_storeapd256_mask ((__v4df *) __P,
5682 (__v4df) __A,
5683 (__mmask8) __U);
5684}
5685
5686static __inline__ void __DEFAULT_FN_ATTRS128
5687_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
5688{
5689 __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
5690 (__v4sf) __A,
5691 (__mmask8) __U);
5692}
5693
5694static __inline__ void __DEFAULT_FN_ATTRS256
5695_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
5696{
5697 __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
5698 (__v8sf) __A,
5699 (__mmask8) __U);
5700}
5701
5702static __inline void __DEFAULT_FN_ATTRS128
5703_mm_storeu_epi64 (void *__P, __m128i __A)
5704{
5705 struct __storeu_epi64 {
5706 __m128i_u __v;
5707 } __attribute__((__packed__, __may_alias__));
5708 ((struct __storeu_epi64*)__P)->__v = __A;
5709}
5710
5711static __inline__ void __DEFAULT_FN_ATTRS128
5712_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
5713{
5714 __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
5715 (__v2di) __A,
5716 (__mmask8) __U);
5717}
5718
5719static __inline void __DEFAULT_FN_ATTRS256
5720_mm256_storeu_epi64 (void *__P, __m256i __A)
5721{
5722 struct __storeu_epi64 {
5723 __m256i_u __v;
5724 } __attribute__((__packed__, __may_alias__));
5725 ((struct __storeu_epi64*)__P)->__v = __A;
5726}
5727
5728static __inline__ void __DEFAULT_FN_ATTRS256
5729_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
5730{
5731 __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
5732 (__v4di) __A,
5733 (__mmask8) __U);
5734}
5735
5736static __inline void __DEFAULT_FN_ATTRS128
5737_mm_storeu_epi32 (void *__P, __m128i __A)
5738{
5739 struct __storeu_epi32 {
5740 __m128i_u __v;
5741 } __attribute__((__packed__, __may_alias__));
5742 ((struct __storeu_epi32*)__P)->__v = __A;
5743}
5744
5745static __inline__ void __DEFAULT_FN_ATTRS128
5746_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
5747{
5748 __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
5749 (__v4si) __A,
5750 (__mmask8) __U);
5751}
5752
5753static __inline void __DEFAULT_FN_ATTRS256
5754_mm256_storeu_epi32 (void *__P, __m256i __A)
5755{
5756 struct __storeu_epi32 {
5757 __m256i_u __v;
5758 } __attribute__((__packed__, __may_alias__));
5759 ((struct __storeu_epi32*)__P)->__v = __A;
5760}
5761
5762static __inline__ void __DEFAULT_FN_ATTRS256
5763_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
5764{
5765 __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
5766 (__v8si) __A,
5767 (__mmask8) __U);
5768}
5769
5770static __inline__ void __DEFAULT_FN_ATTRS128
5771_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
5772{
5773 __builtin_ia32_storeupd128_mask ((__v2df *) __P,
5774 (__v2df) __A,
5775 (__mmask8) __U);
5776}
5777
5778static __inline__ void __DEFAULT_FN_ATTRS256
5779_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
5780{
5781 __builtin_ia32_storeupd256_mask ((__v4df *) __P,
5782 (__v4df) __A,
5783 (__mmask8) __U);
5784}
5785
5786static __inline__ void __DEFAULT_FN_ATTRS128
5787_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
5788{
5789 __builtin_ia32_storeups128_mask ((__v4sf *) __P,
5790 (__v4sf) __A,
5791 (__mmask8) __U);
5792}
5793
5794static __inline__ void __DEFAULT_FN_ATTRS256
5795_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
5796{
5797 __builtin_ia32_storeups256_mask ((__v8sf *) __P,
5798 (__v8sf) __A,
5799 (__mmask8) __U);
5800}
5801
5802
5803static __inline__ __m128d __DEFAULT_FN_ATTRS128
5804_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5805{
5806 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5807 (__v2df)_mm_unpackhi_pd(__A, __B),
5808 (__v2df)__W);
5809}
5810
5811static __inline__ __m128d __DEFAULT_FN_ATTRS128
5812_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
5813{
5814 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5815 (__v2df)_mm_unpackhi_pd(__A, __B),
5816 (__v2df)_mm_setzero_pd());
5817}
5818
5819static __inline__ __m256d __DEFAULT_FN_ATTRS256
5820_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5821{
5822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5823 (__v4df)_mm256_unpackhi_pd(__A, __B),
5824 (__v4df)__W);
5825}
5826
5827static __inline__ __m256d __DEFAULT_FN_ATTRS256
5828_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
5829{
5830 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5831 (__v4df)_mm256_unpackhi_pd(__A, __B),
5832 (__v4df)_mm256_setzero_pd());
5833}
5834
5835static __inline__ __m128 __DEFAULT_FN_ATTRS128
5836_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5837{
5838 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5839 (__v4sf)_mm_unpackhi_ps(__A, __B),
5840 (__v4sf)__W);
5841}
5842
5843static __inline__ __m128 __DEFAULT_FN_ATTRS128
5844_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
5845{
5846 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5847 (__v4sf)_mm_unpackhi_ps(__A, __B),
5848 (__v4sf)_mm_setzero_ps());
5849}
5850
5851static __inline__ __m256 __DEFAULT_FN_ATTRS256
5852_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5853{
5854 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5855 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5856 (__v8sf)__W);
5857}
5858
5859static __inline__ __m256 __DEFAULT_FN_ATTRS256
5860_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
5861{
5862 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5863 (__v8sf)_mm256_unpackhi_ps(__A, __B),
5864 (__v8sf)_mm256_setzero_ps());
5865}
5866
5867static __inline__ __m128d __DEFAULT_FN_ATTRS128
5868_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
5869{
5870 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5871 (__v2df)_mm_unpacklo_pd(__A, __B),
5872 (__v2df)__W);
5873}
5874
5875static __inline__ __m128d __DEFAULT_FN_ATTRS128
5876_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
5877{
5878 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
5879 (__v2df)_mm_unpacklo_pd(__A, __B),
5880 (__v2df)_mm_setzero_pd());
5881}
5882
5883static __inline__ __m256d __DEFAULT_FN_ATTRS256
5884_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
5885{
5886 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5887 (__v4df)_mm256_unpacklo_pd(__A, __B),
5888 (__v4df)__W);
5889}
5890
5891static __inline__ __m256d __DEFAULT_FN_ATTRS256
5892_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
5893{
5894 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
5895 (__v4df)_mm256_unpacklo_pd(__A, __B),
5896 (__v4df)_mm256_setzero_pd());
5897}
5898
5899static __inline__ __m128 __DEFAULT_FN_ATTRS128
5900_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
5901{
5902 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5903 (__v4sf)_mm_unpacklo_ps(__A, __B),
5904 (__v4sf)__W);
5905}
5906
5907static __inline__ __m128 __DEFAULT_FN_ATTRS128
5908_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
5909{
5910 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
5911 (__v4sf)_mm_unpacklo_ps(__A, __B),
5912 (__v4sf)_mm_setzero_ps());
5913}
5914
5915static __inline__ __m256 __DEFAULT_FN_ATTRS256
5916_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
5917{
5918 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5919 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5920 (__v8sf)__W);
5921}
5922
5923static __inline__ __m256 __DEFAULT_FN_ATTRS256
5924_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
5925{
5926 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
5927 (__v8sf)_mm256_unpacklo_ps(__A, __B),
5928 (__v8sf)_mm256_setzero_ps());
5929}
5930
5931static __inline__ __m128d __DEFAULT_FN_ATTRS128
5932_mm_rcp14_pd (__m128d __A)
5933{
5934 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5935 (__v2df)
5936 _mm_setzero_pd (),
5937 (__mmask8) -1);
5938}
5939
5940static __inline__ __m128d __DEFAULT_FN_ATTRS128
5941_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
5942{
5943 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5944 (__v2df) __W,
5945 (__mmask8) __U);
5946}
5947
5948static __inline__ __m128d __DEFAULT_FN_ATTRS128
5950{
5951 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
5952 (__v2df)
5953 _mm_setzero_pd (),
5954 (__mmask8) __U);
5955}
5956
5957static __inline__ __m256d __DEFAULT_FN_ATTRS256
5958_mm256_rcp14_pd (__m256d __A)
5959{
5960 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5961 (__v4df)
5963 (__mmask8) -1);
5964}
5965
5966static __inline__ __m256d __DEFAULT_FN_ATTRS256
5967_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
5968{
5969 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5970 (__v4df) __W,
5971 (__mmask8) __U);
5972}
5973
5974static __inline__ __m256d __DEFAULT_FN_ATTRS256
5976{
5977 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
5978 (__v4df)
5980 (__mmask8) __U);
5981}
5982
5983static __inline__ __m128 __DEFAULT_FN_ATTRS128
5984_mm_rcp14_ps (__m128 __A)
5985{
5986 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5987 (__v4sf)
5988 _mm_setzero_ps (),
5989 (__mmask8) -1);
5990}
5991
5992static __inline__ __m128 __DEFAULT_FN_ATTRS128
5993_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
5994{
5995 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
5996 (__v4sf) __W,
5997 (__mmask8) __U);
5998}
5999
6000static __inline__ __m128 __DEFAULT_FN_ATTRS128
6002{
6003 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
6004 (__v4sf)
6005 _mm_setzero_ps (),
6006 (__mmask8) __U);
6007}
6008
6009static __inline__ __m256 __DEFAULT_FN_ATTRS256
6011{
6012 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6013 (__v8sf)
6015 (__mmask8) -1);
6016}
6017
6018static __inline__ __m256 __DEFAULT_FN_ATTRS256
6019_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6020{
6021 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6022 (__v8sf) __W,
6023 (__mmask8) __U);
6024}
6025
6026static __inline__ __m256 __DEFAULT_FN_ATTRS256
6028{
6029 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
6030 (__v8sf)
6032 (__mmask8) __U);
6033}
6034
6035#define _mm_mask_permute_pd(W, U, X, C) \
6036 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6037 (__v2df)_mm_permute_pd((X), (C)), \
6038 (__v2df)(__m128d)(W))
6039
6040#define _mm_maskz_permute_pd(U, X, C) \
6041 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6042 (__v2df)_mm_permute_pd((X), (C)), \
6043 (__v2df)_mm_setzero_pd())
6044
6045#define _mm256_mask_permute_pd(W, U, X, C) \
6046 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6047 (__v4df)_mm256_permute_pd((X), (C)), \
6048 (__v4df)(__m256d)(W))
6049
6050#define _mm256_maskz_permute_pd(U, X, C) \
6051 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6052 (__v4df)_mm256_permute_pd((X), (C)), \
6053 (__v4df)_mm256_setzero_pd())
6054
6055#define _mm_mask_permute_ps(W, U, X, C) \
6056 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6057 (__v4sf)_mm_permute_ps((X), (C)), \
6058 (__v4sf)(__m128)(W))
6059
6060#define _mm_maskz_permute_ps(U, X, C) \
6061 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6062 (__v4sf)_mm_permute_ps((X), (C)), \
6063 (__v4sf)_mm_setzero_ps())
6064
6065#define _mm256_mask_permute_ps(W, U, X, C) \
6066 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6067 (__v8sf)_mm256_permute_ps((X), (C)), \
6068 (__v8sf)(__m256)(W))
6069
6070#define _mm256_maskz_permute_ps(U, X, C) \
6071 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6072 (__v8sf)_mm256_permute_ps((X), (C)), \
6073 (__v8sf)_mm256_setzero_ps())
6074
6075static __inline__ __m128d __DEFAULT_FN_ATTRS128
6076_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
6077{
6078 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6079 (__v2df)_mm_permutevar_pd(__A, __C),
6080 (__v2df)__W);
6081}
6082
6083static __inline__ __m128d __DEFAULT_FN_ATTRS128
6084_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
6085{
6086 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
6087 (__v2df)_mm_permutevar_pd(__A, __C),
6088 (__v2df)_mm_setzero_pd());
6089}
6090
6091static __inline__ __m256d __DEFAULT_FN_ATTRS256
6092_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
6093{
6094 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6095 (__v4df)_mm256_permutevar_pd(__A, __C),
6096 (__v4df)__W);
6097}
6098
6099static __inline__ __m256d __DEFAULT_FN_ATTRS256
6100_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
6101{
6102 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
6103 (__v4df)_mm256_permutevar_pd(__A, __C),
6104 (__v4df)_mm256_setzero_pd());
6105}
6106
6107static __inline__ __m128 __DEFAULT_FN_ATTRS128
6108_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
6109{
6110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6111 (__v4sf)_mm_permutevar_ps(__A, __C),
6112 (__v4sf)__W);
6113}
6114
6115static __inline__ __m128 __DEFAULT_FN_ATTRS128
6116_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
6117{
6118 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
6119 (__v4sf)_mm_permutevar_ps(__A, __C),
6120 (__v4sf)_mm_setzero_ps());
6121}
6122
6123static __inline__ __m256 __DEFAULT_FN_ATTRS256
6124_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
6125{
6126 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6127 (__v8sf)_mm256_permutevar_ps(__A, __C),
6128 (__v8sf)__W);
6129}
6130
6131static __inline__ __m256 __DEFAULT_FN_ATTRS256
6132_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
6133{
6134 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
6135 (__v8sf)_mm256_permutevar_ps(__A, __C),
6136 (__v8sf)_mm256_setzero_ps());
6137}
6138
6139static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6140_mm_test_epi32_mask (__m128i __A, __m128i __B)
6141{
6143}
6144
6145static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6146_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6147{
6148 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6150}
6151
6152static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6153_mm256_test_epi32_mask (__m256i __A, __m256i __B)
6154{
6155 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
6157}
6158
6159static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6160_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6161{
6162 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6164}
6165
6166static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6167_mm_test_epi64_mask (__m128i __A, __m128i __B)
6168{
6170}
6171
6172static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6173_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6174{
6175 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6177}
6178
6179static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6180_mm256_test_epi64_mask (__m256i __A, __m256i __B)
6181{
6182 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
6184}
6185
6186static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6187_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6188{
6189 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6191}
6192
6193static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6194_mm_testn_epi32_mask (__m128i __A, __m128i __B)
6195{
6197}
6198
6199static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6200_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
6201{
6202 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
6204}
6205
6206static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6207_mm256_testn_epi32_mask (__m256i __A, __m256i __B)
6208{
6209 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
6211}
6212
6213static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6214_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
6215{
6216 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
6218}
6219
6220static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6221_mm_testn_epi64_mask (__m128i __A, __m128i __B)
6222{
6224}
6225
6226static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
6227_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
6228{
6229 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
6231}
6232
6233static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6234_mm256_testn_epi64_mask (__m256i __A, __m256i __B)
6235{
6236 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
6238}
6239
6240static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
6241_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
6242{
6243 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
6245}
6246
6247static __inline__ __m128i __DEFAULT_FN_ATTRS128
6248_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6249{
6250 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6251 (__v4si)_mm_unpackhi_epi32(__A, __B),
6252 (__v4si)__W);
6253}
6254
6255static __inline__ __m128i __DEFAULT_FN_ATTRS128
6256_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6257{
6258 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6259 (__v4si)_mm_unpackhi_epi32(__A, __B),
6260 (__v4si)_mm_setzero_si128());
6261}
6262
6263static __inline__ __m256i __DEFAULT_FN_ATTRS256
6264_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6265{
6266 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6267 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6268 (__v8si)__W);
6269}
6270
6271static __inline__ __m256i __DEFAULT_FN_ATTRS256
6272_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6273{
6274 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6275 (__v8si)_mm256_unpackhi_epi32(__A, __B),
6276 (__v8si)_mm256_setzero_si256());
6277}
6278
6279static __inline__ __m128i __DEFAULT_FN_ATTRS128
6280_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6281{
6282 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6283 (__v2di)_mm_unpackhi_epi64(__A, __B),
6284 (__v2di)__W);
6285}
6286
6287static __inline__ __m128i __DEFAULT_FN_ATTRS128
6288_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6289{
6290 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6291 (__v2di)_mm_unpackhi_epi64(__A, __B),
6292 (__v2di)_mm_setzero_si128());
6293}
6294
6295static __inline__ __m256i __DEFAULT_FN_ATTRS256
6296_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6297{
6298 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6299 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6300 (__v4di)__W);
6301}
6302
6303static __inline__ __m256i __DEFAULT_FN_ATTRS256
6304_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6305{
6306 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6307 (__v4di)_mm256_unpackhi_epi64(__A, __B),
6308 (__v4di)_mm256_setzero_si256());
6309}
6310
6311static __inline__ __m128i __DEFAULT_FN_ATTRS128
6312_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6313{
6314 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6315 (__v4si)_mm_unpacklo_epi32(__A, __B),
6316 (__v4si)__W);
6317}
6318
6319static __inline__ __m128i __DEFAULT_FN_ATTRS128
6320_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6321{
6322 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6323 (__v4si)_mm_unpacklo_epi32(__A, __B),
6324 (__v4si)_mm_setzero_si128());
6325}
6326
6327static __inline__ __m256i __DEFAULT_FN_ATTRS256
6328_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6329{
6330 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6331 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6332 (__v8si)__W);
6333}
6334
6335static __inline__ __m256i __DEFAULT_FN_ATTRS256
6336_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
6337{
6338 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6339 (__v8si)_mm256_unpacklo_epi32(__A, __B),
6340 (__v8si)_mm256_setzero_si256());
6341}
6342
6343static __inline__ __m128i __DEFAULT_FN_ATTRS128
6344_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6345{
6346 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6347 (__v2di)_mm_unpacklo_epi64(__A, __B),
6348 (__v2di)__W);
6349}
6350
6351static __inline__ __m128i __DEFAULT_FN_ATTRS128
6352_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6353{
6354 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
6355 (__v2di)_mm_unpacklo_epi64(__A, __B),
6356 (__v2di)_mm_setzero_si128());
6357}
6358
6359static __inline__ __m256i __DEFAULT_FN_ATTRS256
6360_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
6361{
6362 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6363 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6364 (__v4di)__W);
6365}
6366
6367static __inline__ __m256i __DEFAULT_FN_ATTRS256
6368_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
6369{
6370 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
6371 (__v4di)_mm256_unpacklo_epi64(__A, __B),
6372 (__v4di)_mm256_setzero_si256());
6373}
6374
6375static __inline__ __m128i __DEFAULT_FN_ATTRS128
6376_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6377{
6378 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6379 (__v4si)_mm_sra_epi32(__A, __B),
6380 (__v4si)__W);
6381}
6382
6383static __inline__ __m128i __DEFAULT_FN_ATTRS128
6384_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
6385{
6386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6387 (__v4si)_mm_sra_epi32(__A, __B),
6388 (__v4si)_mm_setzero_si128());
6389}
6390
6391static __inline__ __m256i __DEFAULT_FN_ATTRS256
6392_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6393{
6394 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6395 (__v8si)_mm256_sra_epi32(__A, __B),
6396 (__v8si)__W);
6397}
6398
6399static __inline__ __m256i __DEFAULT_FN_ATTRS256
6400_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
6401{
6402 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6403 (__v8si)_mm256_sra_epi32(__A, __B),
6404 (__v8si)_mm256_setzero_si256());
6405}
6406
6407static __inline__ __m128i __DEFAULT_FN_ATTRS128
6408_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
6409{
6410 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6411 (__v4si)_mm_srai_epi32(__A, __B),
6412 (__v4si)__W);
6413}
6414
6415static __inline__ __m128i __DEFAULT_FN_ATTRS128
6416_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
6417{
6418 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
6419 (__v4si)_mm_srai_epi32(__A, __B),
6420 (__v4si)_mm_setzero_si128());
6421}
6422
6423static __inline__ __m256i __DEFAULT_FN_ATTRS256
6424_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
6425{
6426 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6427 (__v8si)_mm256_srai_epi32(__A, __B),
6428 (__v8si)__W);
6429}
6430
6431static __inline__ __m256i __DEFAULT_FN_ATTRS256
6432_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
6433{
6434 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
6435 (__v8si)_mm256_srai_epi32(__A, __B),
6436 (__v8si)_mm256_setzero_si256());
6437}
6438
6439static __inline__ __m128i __DEFAULT_FN_ATTRS128
6440_mm_sra_epi64(__m128i __A, __m128i __B)
6441{
6442 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
6443}
6444
6445static __inline__ __m128i __DEFAULT_FN_ATTRS128
6446_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
6447{
6448 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6449 (__v2di)_mm_sra_epi64(__A, __B), \
6450 (__v2di)__W);
6451}
6452
6453static __inline__ __m128i __DEFAULT_FN_ATTRS128
6454_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
6455{
6456 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6457 (__v2di)_mm_sra_epi64(__A, __B), \
6458 (__v2di)_mm_setzero_si128());
6459}
6460
6461static __inline__ __m256i __DEFAULT_FN_ATTRS256
6462_mm256_sra_epi64(__m256i __A, __m128i __B)
6463{
6464 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
6465}
6466
6467static __inline__ __m256i __DEFAULT_FN_ATTRS256
6468_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
6469{
6470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6471 (__v4di)_mm256_sra_epi64(__A, __B), \
6472 (__v4di)__W);
6473}
6474
6475static __inline__ __m256i __DEFAULT_FN_ATTRS256
6476_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
6477{
6478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6479 (__v4di)_mm256_sra_epi64(__A, __B), \
6480 (__v4di)_mm256_setzero_si256());
6481}
6482
6483static __inline__ __m128i __DEFAULT_FN_ATTRS128
6484_mm_srai_epi64(__m128i __A, unsigned int __imm)
6485{
6486 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
6487}
6488
6489static __inline__ __m128i __DEFAULT_FN_ATTRS128
6490_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
6491{
6492 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6493 (__v2di)_mm_srai_epi64(__A, __imm), \
6494 (__v2di)__W);
6495}
6496
6497static __inline__ __m128i __DEFAULT_FN_ATTRS128
6498_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
6499{
6500 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
6501 (__v2di)_mm_srai_epi64(__A, __imm), \
6502 (__v2di)_mm_setzero_si128());
6503}
6504
6505static __inline__ __m256i __DEFAULT_FN_ATTRS256
6506_mm256_srai_epi64(__m256i __A, unsigned int __imm)
6507{
6508 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
6509}
6510
6511static __inline__ __m256i __DEFAULT_FN_ATTRS256
6512_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
6513 unsigned int __imm)
6514{
6515 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6516 (__v4di)_mm256_srai_epi64(__A, __imm), \
6517 (__v4di)__W);
6518}
6519
6520static __inline__ __m256i __DEFAULT_FN_ATTRS256
6521_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
6522{
6523 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
6524 (__v4di)_mm256_srai_epi64(__A, __imm), \
6525 (__v4di)_mm256_setzero_si256());
6526}
6527
6528#define _mm_ternarylogic_epi32(A, B, C, imm) \
6529 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6530 (__v4si)(__m128i)(B), \
6531 (__v4si)(__m128i)(C), (int)(imm), \
6532 (__mmask8)-1)
6533
6534#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \
6535 (__m128i)__builtin_ia32_pternlogd128_mask((__v4si)(__m128i)(A), \
6536 (__v4si)(__m128i)(B), \
6537 (__v4si)(__m128i)(C), (int)(imm), \
6538 (__mmask8)(U))
6539
6540#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6541 (__m128i)__builtin_ia32_pternlogd128_maskz((__v4si)(__m128i)(A), \
6542 (__v4si)(__m128i)(B), \
6543 (__v4si)(__m128i)(C), (int)(imm), \
6544 (__mmask8)(U))
6545
6546#define _mm256_ternarylogic_epi32(A, B, C, imm) \
6547 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6548 (__v8si)(__m256i)(B), \
6549 (__v8si)(__m256i)(C), (int)(imm), \
6550 (__mmask8)-1)
6551
6552#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \
6553 (__m256i)__builtin_ia32_pternlogd256_mask((__v8si)(__m256i)(A), \
6554 (__v8si)(__m256i)(B), \
6555 (__v8si)(__m256i)(C), (int)(imm), \
6556 (__mmask8)(U))
6557
6558#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \
6559 (__m256i)__builtin_ia32_pternlogd256_maskz((__v8si)(__m256i)(A), \
6560 (__v8si)(__m256i)(B), \
6561 (__v8si)(__m256i)(C), (int)(imm), \
6562 (__mmask8)(U))
6563
6564#define _mm_ternarylogic_epi64(A, B, C, imm) \
6565 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6566 (__v2di)(__m128i)(B), \
6567 (__v2di)(__m128i)(C), (int)(imm), \
6568 (__mmask8)-1)
6569
6570#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \
6571 (__m128i)__builtin_ia32_pternlogq128_mask((__v2di)(__m128i)(A), \
6572 (__v2di)(__m128i)(B), \
6573 (__v2di)(__m128i)(C), (int)(imm), \
6574 (__mmask8)(U))
6575
6576#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6577 (__m128i)__builtin_ia32_pternlogq128_maskz((__v2di)(__m128i)(A), \
6578 (__v2di)(__m128i)(B), \
6579 (__v2di)(__m128i)(C), (int)(imm), \
6580 (__mmask8)(U))
6581
6582#define _mm256_ternarylogic_epi64(A, B, C, imm) \
6583 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6584 (__v4di)(__m256i)(B), \
6585 (__v4di)(__m256i)(C), (int)(imm), \
6586 (__mmask8)-1)
6587
6588#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \
6589 (__m256i)__builtin_ia32_pternlogq256_mask((__v4di)(__m256i)(A), \
6590 (__v4di)(__m256i)(B), \
6591 (__v4di)(__m256i)(C), (int)(imm), \
6592 (__mmask8)(U))
6593
6594#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \
6595 (__m256i)__builtin_ia32_pternlogq256_maskz((__v4di)(__m256i)(A), \
6596 (__v4di)(__m256i)(B), \
6597 (__v4di)(__m256i)(C), (int)(imm), \
6598 (__mmask8)(U))
6599
6600
6601
6602#define _mm256_shuffle_f32x4(A, B, imm) \
6603 (__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
6604 (__v8sf)(__m256)(B), (int)(imm))
6605
6606#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
6607 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6608 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6609 (__v8sf)(__m256)(W))
6610
6611#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
6612 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6613 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
6614 (__v8sf)_mm256_setzero_ps())
6615
6616#define _mm256_shuffle_f64x2(A, B, imm) \
6617 (__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
6618 (__v4df)(__m256d)(B), (int)(imm))
6619
6620#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
6621 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6622 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6623 (__v4df)(__m256d)(W))
6624
6625#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
6626 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6627 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
6628 (__v4df)_mm256_setzero_pd())
6629
6630#define _mm256_shuffle_i32x4(A, B, imm) \
6631 (__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
6632 (__v8si)(__m256i)(B), (int)(imm))
6633
6634#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
6635 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6636 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6637 (__v8si)(__m256i)(W))
6638
6639#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
6640 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
6641 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
6642 (__v8si)_mm256_setzero_si256())
6643
6644#define _mm256_shuffle_i64x2(A, B, imm) \
6645 (__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
6646 (__v4di)(__m256i)(B), (int)(imm))
6647
6648#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
6649 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6650 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6651 (__v4di)(__m256i)(W))
6652
6653
6654#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
6655 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
6656 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
6657 (__v4di)_mm256_setzero_si256())
6658
6659#define _mm_mask_shuffle_pd(W, U, A, B, M) \
6660 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6661 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6662 (__v2df)(__m128d)(W))
6663
6664#define _mm_maskz_shuffle_pd(U, A, B, M) \
6665 (__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
6666 (__v2df)_mm_shuffle_pd((A), (B), (M)), \
6667 (__v2df)_mm_setzero_pd())
6668
6669#define _mm256_mask_shuffle_pd(W, U, A, B, M) \
6670 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6671 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6672 (__v4df)(__m256d)(W))
6673
6674#define _mm256_maskz_shuffle_pd(U, A, B, M) \
6675 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
6676 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
6677 (__v4df)_mm256_setzero_pd())
6678
6679#define _mm_mask_shuffle_ps(W, U, A, B, M) \
6680 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6681 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6682 (__v4sf)(__m128)(W))
6683
6684#define _mm_maskz_shuffle_ps(U, A, B, M) \
6685 (__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
6686 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
6687 (__v4sf)_mm_setzero_ps())
6688
6689#define _mm256_mask_shuffle_ps(W, U, A, B, M) \
6690 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6691 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6692 (__v8sf)(__m256)(W))
6693
6694#define _mm256_maskz_shuffle_ps(U, A, B, M) \
6695 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
6696 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
6697 (__v8sf)_mm256_setzero_ps())
6698
6699static __inline__ __m128d __DEFAULT_FN_ATTRS128
6700_mm_rsqrt14_pd (__m128d __A)
6701{
6702 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6703 (__v2df)
6704 _mm_setzero_pd (),
6705 (__mmask8) -1);
6706}
6707
6708static __inline__ __m128d __DEFAULT_FN_ATTRS128
6709_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
6710{
6711 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6712 (__v2df) __W,
6713 (__mmask8) __U);
6714}
6715
6716static __inline__ __m128d __DEFAULT_FN_ATTRS128
6718{
6719 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
6720 (__v2df)
6721 _mm_setzero_pd (),
6722 (__mmask8) __U);
6723}
6724
6725static __inline__ __m256d __DEFAULT_FN_ATTRS256
6727{
6728 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6729 (__v4df)
6731 (__mmask8) -1);
6732}
6733
6734static __inline__ __m256d __DEFAULT_FN_ATTRS256
6735_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
6736{
6737 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6738 (__v4df) __W,
6739 (__mmask8) __U);
6740}
6741
6742static __inline__ __m256d __DEFAULT_FN_ATTRS256
6744{
6745 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
6746 (__v4df)
6748 (__mmask8) __U);
6749}
6750
6751static __inline__ __m128 __DEFAULT_FN_ATTRS128
6752_mm_rsqrt14_ps (__m128 __A)
6753{
6754 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6755 (__v4sf)
6756 _mm_setzero_ps (),
6757 (__mmask8) -1);
6758}
6759
6760static __inline__ __m128 __DEFAULT_FN_ATTRS128
6761_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
6762{
6763 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6764 (__v4sf) __W,
6765 (__mmask8) __U);
6766}
6767
6768static __inline__ __m128 __DEFAULT_FN_ATTRS128
6770{
6771 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
6772 (__v4sf)
6773 _mm_setzero_ps (),
6774 (__mmask8) __U);
6775}
6776
6777static __inline__ __m256 __DEFAULT_FN_ATTRS256
6779{
6780 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6781 (__v8sf)
6783 (__mmask8) -1);
6784}
6785
6786static __inline__ __m256 __DEFAULT_FN_ATTRS256
6787_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
6788{
6789 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6790 (__v8sf) __W,
6791 (__mmask8) __U);
6792}
6793
6794static __inline__ __m256 __DEFAULT_FN_ATTRS256
6796{
6797 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
6798 (__v8sf)
6800 (__mmask8) __U);
6801}
6802
6803static __inline__ __m256 __DEFAULT_FN_ATTRS256
6805{
6806 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
6807 0, 1, 2, 3, 0, 1, 2, 3);
6808}
6809
6810static __inline__ __m256 __DEFAULT_FN_ATTRS256
6811_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
6812{
6813 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6814 (__v8sf)_mm256_broadcast_f32x4(__A),
6815 (__v8sf)__O);
6816}
6817
6818static __inline__ __m256 __DEFAULT_FN_ATTRS256
6820{
6821 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
6822 (__v8sf)_mm256_broadcast_f32x4(__A),
6823 (__v8sf)_mm256_setzero_ps());
6824}
6825
6826static __inline__ __m256i __DEFAULT_FN_ATTRS256
6828{
6829 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
6830 0, 1, 2, 3, 0, 1, 2, 3);
6831}
6832
6833static __inline__ __m256i __DEFAULT_FN_ATTRS256
6834_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
6835{
6836 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6837 (__v8si)_mm256_broadcast_i32x4(__A),
6838 (__v8si)__O);
6839}
6840
6841static __inline__ __m256i __DEFAULT_FN_ATTRS256
6843{
6844 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
6845 (__v8si)_mm256_broadcast_i32x4(__A),
6846 (__v8si)_mm256_setzero_si256());
6847}
6848
6849static __inline__ __m256d __DEFAULT_FN_ATTRS256
6850_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
6851{
6852 return (__m256d)__builtin_ia32_selectpd_256(__M,
6853 (__v4df) _mm256_broadcastsd_pd(__A),
6854 (__v4df) __O);
6855}
6856
6857static __inline__ __m256d __DEFAULT_FN_ATTRS256
6859{
6860 return (__m256d)__builtin_ia32_selectpd_256(__M,
6861 (__v4df) _mm256_broadcastsd_pd(__A),
6862 (__v4df) _mm256_setzero_pd());
6863}
6864
6865static __inline__ __m128 __DEFAULT_FN_ATTRS128
6866_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
6867{
6868 return (__m128)__builtin_ia32_selectps_128(__M,
6869 (__v4sf) _mm_broadcastss_ps(__A),
6870 (__v4sf) __O);
6871}
6872
6873static __inline__ __m128 __DEFAULT_FN_ATTRS128
6875{
6876 return (__m128)__builtin_ia32_selectps_128(__M,
6877 (__v4sf) _mm_broadcastss_ps(__A),
6878 (__v4sf) _mm_setzero_ps());
6879}
6880
6881static __inline__ __m256 __DEFAULT_FN_ATTRS256
6882_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
6883{
6884 return (__m256)__builtin_ia32_selectps_256(__M,
6885 (__v8sf) _mm256_broadcastss_ps(__A),
6886 (__v8sf) __O);
6887}
6888
6889static __inline__ __m256 __DEFAULT_FN_ATTRS256
6891{
6892 return (__m256)__builtin_ia32_selectps_256(__M,
6893 (__v8sf) _mm256_broadcastss_ps(__A),
6894 (__v8sf) _mm256_setzero_ps());
6895}
6896
6897static __inline__ __m128i __DEFAULT_FN_ATTRS128
6898_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
6899{
6900 return (__m128i)__builtin_ia32_selectd_128(__M,
6901 (__v4si) _mm_broadcastd_epi32(__A),
6902 (__v4si) __O);
6903}
6904
6905static __inline__ __m128i __DEFAULT_FN_ATTRS128
6907{
6908 return (__m128i)__builtin_ia32_selectd_128(__M,
6909 (__v4si) _mm_broadcastd_epi32(__A),
6910 (__v4si) _mm_setzero_si128());
6911}
6912
6913static __inline__ __m256i __DEFAULT_FN_ATTRS256
6914_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
6915{
6916 return (__m256i)__builtin_ia32_selectd_256(__M,
6917 (__v8si) _mm256_broadcastd_epi32(__A),
6918 (__v8si) __O);
6919}
6920
6921static __inline__ __m256i __DEFAULT_FN_ATTRS256
6923{
6924 return (__m256i)__builtin_ia32_selectd_256(__M,
6925 (__v8si) _mm256_broadcastd_epi32(__A),
6926 (__v8si) _mm256_setzero_si256());
6927}
6928
6929static __inline__ __m128i __DEFAULT_FN_ATTRS128
6930_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
6931{
6932 return (__m128i)__builtin_ia32_selectq_128(__M,
6933 (__v2di) _mm_broadcastq_epi64(__A),
6934 (__v2di) __O);
6935}
6936
6937static __inline__ __m128i __DEFAULT_FN_ATTRS128
6939{
6940 return (__m128i)__builtin_ia32_selectq_128(__M,
6941 (__v2di) _mm_broadcastq_epi64(__A),
6942 (__v2di) _mm_setzero_si128());
6943}
6944
6945static __inline__ __m256i __DEFAULT_FN_ATTRS256
6946_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
6947{
6948 return (__m256i)__builtin_ia32_selectq_256(__M,
6949 (__v4di) _mm256_broadcastq_epi64(__A),
6950 (__v4di) __O);
6951}
6952
6953static __inline__ __m256i __DEFAULT_FN_ATTRS256
6955{
6956 return (__m256i)__builtin_ia32_selectq_256(__M,
6957 (__v4di) _mm256_broadcastq_epi64(__A),
6958 (__v4di) _mm256_setzero_si256());
6959}
6960
6961static __inline__ __m128i __DEFAULT_FN_ATTRS128
6963{
6964 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6965 (__v16qi)_mm_undefined_si128(),
6966 (__mmask8) -1);
6967}
6968
6969static __inline__ __m128i __DEFAULT_FN_ATTRS128
6970_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
6971{
6972 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6973 (__v16qi) __O, __M);
6974}
6975
6976static __inline__ __m128i __DEFAULT_FN_ATTRS128
6978{
6979 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
6980 (__v16qi) _mm_setzero_si128 (),
6981 __M);
6982}
6983
6984static __inline__ void __DEFAULT_FN_ATTRS128
6985_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
6986{
6987 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
6988}
6989
6990static __inline__ __m128i __DEFAULT_FN_ATTRS256
6992{
6993 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
6994 (__v16qi)_mm_undefined_si128(),
6995 (__mmask8) -1);
6996}
6997
6998static __inline__ __m128i __DEFAULT_FN_ATTRS256
6999_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7000{
7001 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7002 (__v16qi) __O, __M);
7003}
7004
7005static __inline__ __m128i __DEFAULT_FN_ATTRS256
7007{
7008 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
7009 (__v16qi) _mm_setzero_si128 (),
7010 __M);
7011}
7012
7013static __inline__ void __DEFAULT_FN_ATTRS256
7014_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7015{
7016 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7017}
7018
7019static __inline__ __m128i __DEFAULT_FN_ATTRS128
7021{
7022 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7023 (__v8hi)_mm_setzero_si128 (),
7024 (__mmask8) -1);
7025}
7026
7027static __inline__ __m128i __DEFAULT_FN_ATTRS128
7028_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7029{
7030 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7031 (__v8hi)__O,
7032 __M);
7033}
7034
7035static __inline__ __m128i __DEFAULT_FN_ATTRS128
7037{
7038 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
7039 (__v8hi) _mm_setzero_si128 (),
7040 __M);
7041}
7042
7043static __inline__ void __DEFAULT_FN_ATTRS128
7044_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7045{
7046 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7047}
7048
7049static __inline__ __m128i __DEFAULT_FN_ATTRS256
7051{
7052 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7053 (__v8hi)_mm_undefined_si128(),
7054 (__mmask8) -1);
7055}
7056
7057static __inline__ __m128i __DEFAULT_FN_ATTRS256
7058_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7059{
7060 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7061 (__v8hi) __O, __M);
7062}
7063
7064static __inline__ __m128i __DEFAULT_FN_ATTRS256
7066{
7067 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
7068 (__v8hi) _mm_setzero_si128 (),
7069 __M);
7070}
7071
7072static __inline__ void __DEFAULT_FN_ATTRS256
7073_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7074{
7075 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7076}
7077
7078static __inline__ __m128i __DEFAULT_FN_ATTRS128
7080{
7081 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7082 (__v16qi)_mm_undefined_si128(),
7083 (__mmask8) -1);
7084}
7085
7086static __inline__ __m128i __DEFAULT_FN_ATTRS128
7087_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7088{
7089 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7090 (__v16qi) __O, __M);
7091}
7092
7093static __inline__ __m128i __DEFAULT_FN_ATTRS128
7095{
7096 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
7097 (__v16qi) _mm_setzero_si128 (),
7098 __M);
7099}
7100
7101static __inline__ void __DEFAULT_FN_ATTRS128
7102_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7103{
7104 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7105}
7106
7107static __inline__ __m128i __DEFAULT_FN_ATTRS256
7109{
7110 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7111 (__v16qi)_mm_undefined_si128(),
7112 (__mmask8) -1);
7113}
7114
7115static __inline__ __m128i __DEFAULT_FN_ATTRS256
7116_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7117{
7118 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7119 (__v16qi) __O, __M);
7120}
7121
7122static __inline__ __m128i __DEFAULT_FN_ATTRS256
7124{
7125 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
7126 (__v16qi) _mm_setzero_si128 (),
7127 __M);
7128}
7129
7130static __inline__ void __DEFAULT_FN_ATTRS256
7131_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7132{
7133 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7134}
7135
7136static __inline__ __m128i __DEFAULT_FN_ATTRS128
7138{
7139 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7140 (__v4si)_mm_undefined_si128(),
7141 (__mmask8) -1);
7142}
7143
7144static __inline__ __m128i __DEFAULT_FN_ATTRS128
7145_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7146{
7147 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7148 (__v4si) __O, __M);
7149}
7150
7151static __inline__ __m128i __DEFAULT_FN_ATTRS128
7153{
7154 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
7155 (__v4si) _mm_setzero_si128 (),
7156 __M);
7157}
7158
7159static __inline__ void __DEFAULT_FN_ATTRS128
7160_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7161{
7162 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7163}
7164
7165static __inline__ __m128i __DEFAULT_FN_ATTRS256
7167{
7168 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7169 (__v4si)_mm_undefined_si128(),
7170 (__mmask8) -1);
7171}
7172
7173static __inline__ __m128i __DEFAULT_FN_ATTRS256
7174_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7175{
7176 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7177 (__v4si)__O,
7178 __M);
7179}
7180
7181static __inline__ __m128i __DEFAULT_FN_ATTRS256
7183{
7184 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
7185 (__v4si) _mm_setzero_si128 (),
7186 __M);
7187}
7188
7189static __inline__ void __DEFAULT_FN_ATTRS256
7190_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7191{
7192 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7193}
7194
7195static __inline__ __m128i __DEFAULT_FN_ATTRS128
7197{
7198 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7199 (__v8hi)_mm_undefined_si128(),
7200 (__mmask8) -1);
7201}
7202
7203static __inline__ __m128i __DEFAULT_FN_ATTRS128
7204_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7205{
7206 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7207 (__v8hi) __O, __M);
7208}
7209
7210static __inline__ __m128i __DEFAULT_FN_ATTRS128
7212{
7213 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
7214 (__v8hi) _mm_setzero_si128 (),
7215 __M);
7216}
7217
7218static __inline__ void __DEFAULT_FN_ATTRS128
7219_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7220{
7221 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7222}
7223
7224static __inline__ __m128i __DEFAULT_FN_ATTRS256
7226{
7227 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7228 (__v8hi)_mm_undefined_si128(),
7229 (__mmask8) -1);
7230}
7231
7232static __inline__ __m128i __DEFAULT_FN_ATTRS256
7233_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7234{
7235 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7236 (__v8hi) __O, __M);
7237}
7238
7239static __inline__ __m128i __DEFAULT_FN_ATTRS256
7241{
7242 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
7243 (__v8hi) _mm_setzero_si128 (),
7244 __M);
7245}
7246
7247static __inline__ void __DEFAULT_FN_ATTRS256
7248_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7249{
7250 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7251}
7252
7253static __inline__ __m128i __DEFAULT_FN_ATTRS128
7255{
7256 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7257 (__v16qi)_mm_undefined_si128(),
7258 (__mmask8) -1);
7259}
7260
7261static __inline__ __m128i __DEFAULT_FN_ATTRS128
7262_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7263{
7264 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7265 (__v16qi) __O,
7266 __M);
7267}
7268
7269static __inline__ __m128i __DEFAULT_FN_ATTRS128
7271{
7272 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
7273 (__v16qi) _mm_setzero_si128 (),
7274 __M);
7275}
7276
7277static __inline__ void __DEFAULT_FN_ATTRS128
7278_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7279{
7280 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7281}
7282
7283static __inline__ __m128i __DEFAULT_FN_ATTRS256
7285{
7286 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7287 (__v16qi)_mm_undefined_si128(),
7288 (__mmask8) -1);
7289}
7290
7291static __inline__ __m128i __DEFAULT_FN_ATTRS256
7292_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7293{
7294 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7295 (__v16qi) __O,
7296 __M);
7297}
7298
7299static __inline__ __m128i __DEFAULT_FN_ATTRS256
7301{
7302 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
7303 (__v16qi) _mm_setzero_si128 (),
7304 __M);
7305}
7306
7307static __inline__ void __DEFAULT_FN_ATTRS256
7308_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7309{
7310 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
7311}
7312
7313static __inline__ __m128i __DEFAULT_FN_ATTRS128
7315{
7316 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7317 (__v8hi)_mm_undefined_si128(),
7318 (__mmask8) -1);
7319}
7320
7321static __inline__ __m128i __DEFAULT_FN_ATTRS128
7322_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7323{
7324 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7325 (__v8hi) __O, __M);
7326}
7327
7328static __inline__ __m128i __DEFAULT_FN_ATTRS128
7330{
7331 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
7332 (__v8hi) _mm_setzero_si128 (),
7333 __M);
7334}
7335
7336static __inline__ void __DEFAULT_FN_ATTRS128
7337_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7338{
7339 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7340}
7341
7342static __inline__ __m128i __DEFAULT_FN_ATTRS256
7344{
7345 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7346 (__v8hi) _mm_undefined_si128(),
7347 (__mmask8) -1);
7348}
7349
7350static __inline__ __m128i __DEFAULT_FN_ATTRS256
7351_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7352{
7353 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7354 (__v8hi) __O, __M);
7355}
7356
7357static __inline__ __m128i __DEFAULT_FN_ATTRS256
7359{
7360 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
7361 (__v8hi) _mm_setzero_si128 (),
7362 __M);
7363}
7364
7365static __inline__ void __DEFAULT_FN_ATTRS256
7366_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7367{
7368 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7369}
7370
7371static __inline__ __m128i __DEFAULT_FN_ATTRS128
7373{
7374 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7375 (__v16qi)_mm_undefined_si128(),
7376 (__mmask8) -1);
7377}
7378
7379static __inline__ __m128i __DEFAULT_FN_ATTRS128
7380_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7381{
7382 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7383 (__v16qi) __O,
7384 __M);
7385}
7386
7387static __inline__ __m128i __DEFAULT_FN_ATTRS128
7389{
7390 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
7391 (__v16qi) _mm_setzero_si128 (),
7392 __M);
7393}
7394
7395static __inline__ void __DEFAULT_FN_ATTRS128
7396_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7397{
7398 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7399}
7400
7401static __inline__ __m128i __DEFAULT_FN_ATTRS256
7403{
7404 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7405 (__v16qi)_mm_undefined_si128(),
7406 (__mmask8) -1);
7407}
7408
7409static __inline__ __m128i __DEFAULT_FN_ATTRS256
7410_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7411{
7412 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7413 (__v16qi) __O,
7414 __M);
7415}
7416
7417static __inline__ __m128i __DEFAULT_FN_ATTRS256
7419{
7420 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
7421 (__v16qi) _mm_setzero_si128 (),
7422 __M);
7423}
7424
7425static __inline__ void __DEFAULT_FN_ATTRS256
7426_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7427{
7428 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7429}
7430
7431static __inline__ __m128i __DEFAULT_FN_ATTRS128
7433{
7434 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7435 (__v4si)_mm_undefined_si128(),
7436 (__mmask8) -1);
7437}
7438
7439static __inline__ __m128i __DEFAULT_FN_ATTRS128
7440_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7441{
7442 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7443 (__v4si) __O, __M);
7444}
7445
7446static __inline__ __m128i __DEFAULT_FN_ATTRS128
7448{
7449 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
7450 (__v4si) _mm_setzero_si128 (),
7451 __M);
7452}
7453
7454static __inline__ void __DEFAULT_FN_ATTRS128
7455_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7456{
7457 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7458}
7459
7460static __inline__ __m128i __DEFAULT_FN_ATTRS256
7462{
7463 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7464 (__v4si)_mm_undefined_si128(),
7465 (__mmask8) -1);
7466}
7467
7468static __inline__ __m128i __DEFAULT_FN_ATTRS256
7469_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7470{
7471 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7472 (__v4si) __O, __M);
7473}
7474
7475static __inline__ __m128i __DEFAULT_FN_ATTRS256
7477{
7478 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
7479 (__v4si) _mm_setzero_si128 (),
7480 __M);
7481}
7482
7483static __inline__ void __DEFAULT_FN_ATTRS256
7484_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7485{
7486 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7487}
7488
7489static __inline__ __m128i __DEFAULT_FN_ATTRS128
7491{
7492 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7493 (__v8hi)_mm_undefined_si128(),
7494 (__mmask8) -1);
7495}
7496
7497static __inline__ __m128i __DEFAULT_FN_ATTRS128
7498_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7499{
7500 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7501 (__v8hi) __O, __M);
7502}
7503
7504static __inline__ __m128i __DEFAULT_FN_ATTRS128
7506{
7507 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
7508 (__v8hi) _mm_setzero_si128 (),
7509 __M);
7510}
7511
7512static __inline__ void __DEFAULT_FN_ATTRS128
7513_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7514{
7515 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7516}
7517
7518static __inline__ __m128i __DEFAULT_FN_ATTRS256
7520{
7521 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7522 (__v8hi)_mm_undefined_si128(),
7523 (__mmask8) -1);
7524}
7525
7526static __inline__ __m128i __DEFAULT_FN_ATTRS256
7527_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7528{
7529 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7530 (__v8hi) __O, __M);
7531}
7532
7533static __inline__ __m128i __DEFAULT_FN_ATTRS256
7535{
7536 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
7537 (__v8hi) _mm_setzero_si128 (),
7538 __M);
7539}
7540
7541static __inline__ void __DEFAULT_FN_ATTRS256
7542_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7543{
7544 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7545}
7546
7547static __inline__ __m128i __DEFAULT_FN_ATTRS128
7549{
7550 return (__m128i)__builtin_shufflevector(
7551 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7552 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7553}
7554
7555static __inline__ __m128i __DEFAULT_FN_ATTRS128
7556_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7557{
7558 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7559 (__v16qi) __O, __M);
7560}
7561
7562static __inline__ __m128i __DEFAULT_FN_ATTRS128
7564{
7565 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
7566 (__v16qi)
7568 __M);
7569}
7570
7571static __inline__ void __DEFAULT_FN_ATTRS128
7572_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7573{
7574 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
7575}
7576
7577static __inline__ __m128i __DEFAULT_FN_ATTRS256
7579{
7580 return (__m128i)__builtin_shufflevector(
7581 __builtin_convertvector((__v8si)__A, __v8qi),
7582 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
7583 12, 13, 14, 15);
7584}
7585
7586static __inline__ __m128i __DEFAULT_FN_ATTRS256
7587_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7588{
7589 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7590 (__v16qi) __O, __M);
7591}
7592
7593static __inline__ __m128i __DEFAULT_FN_ATTRS256
7595{
7596 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
7597 (__v16qi) _mm_setzero_si128 (),
7598 __M);
7599}
7600
7601static __inline__ void __DEFAULT_FN_ATTRS256
7602_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7603{
7604 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
7605}
7606
7607static __inline__ __m128i __DEFAULT_FN_ATTRS128
7609{
7610 return (__m128i)__builtin_shufflevector(
7611 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7612 2, 3, 4, 5, 6, 7);
7613}
7614
7615static __inline__ __m128i __DEFAULT_FN_ATTRS128
7616_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7617{
7618 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7619 (__v8hi) __O, __M);
7620}
7621
7622static __inline__ __m128i __DEFAULT_FN_ATTRS128
7624{
7625 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
7626 (__v8hi) _mm_setzero_si128 (),
7627 __M);
7628}
7629
7630static __inline__ void __DEFAULT_FN_ATTRS128
7631_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7632{
7633 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
7634}
7635
7636static __inline__ __m128i __DEFAULT_FN_ATTRS256
7638{
7639 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
7640}
7641
7642static __inline__ __m128i __DEFAULT_FN_ATTRS256
7643_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7644{
7645 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7646 (__v8hi) __O, __M);
7647}
7648
7649static __inline__ __m128i __DEFAULT_FN_ATTRS256
7651{
7652 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
7653 (__v8hi) _mm_setzero_si128 (),
7654 __M);
7655}
7656
7657static __inline__ void __DEFAULT_FN_ATTRS256
7658_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7659{
7660 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
7661}
7662
7663static __inline__ __m128i __DEFAULT_FN_ATTRS128
7665{
7666 return (__m128i)__builtin_shufflevector(
7667 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
7668 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
7669}
7670
7671static __inline__ __m128i __DEFAULT_FN_ATTRS128
7672_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
7673{
7674 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7675 (__v16qi) __O, __M);
7676}
7677
7678static __inline__ __m128i __DEFAULT_FN_ATTRS128
7680{
7681 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
7682 (__v16qi) _mm_setzero_si128 (),
7683 __M);
7684}
7685
7686static __inline__ void __DEFAULT_FN_ATTRS128
7687_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
7688{
7689 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
7690}
7691
7692static __inline__ __m128i __DEFAULT_FN_ATTRS256
7694{
7695 return (__m128i)__builtin_shufflevector(
7696 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
7697 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
7698}
7699
7700static __inline__ __m128i __DEFAULT_FN_ATTRS256
7701_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
7702{
7703 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7704 (__v16qi) __O, __M);
7705}
7706
7707static __inline__ __m128i __DEFAULT_FN_ATTRS256
7709{
7710 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
7711 (__v16qi) _mm_setzero_si128 (),
7712 __M);
7713}
7714
7715static __inline__ void __DEFAULT_FN_ATTRS256
7716_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
7717{
7718 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
7719}
7720
7721static __inline__ __m128i __DEFAULT_FN_ATTRS128
7723{
7724 return (__m128i)__builtin_shufflevector(
7725 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
7726}
7727
7728static __inline__ __m128i __DEFAULT_FN_ATTRS128
7729_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
7730{
7731 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7732 (__v4si) __O, __M);
7733}
7734
7735static __inline__ __m128i __DEFAULT_FN_ATTRS128
7737{
7738 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
7739 (__v4si) _mm_setzero_si128 (),
7740 __M);
7741}
7742
7743static __inline__ void __DEFAULT_FN_ATTRS128
7744_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
7745{
7746 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
7747}
7748
7749static __inline__ __m128i __DEFAULT_FN_ATTRS256
7751{
7752 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
7753}
7754
7755static __inline__ __m128i __DEFAULT_FN_ATTRS256
7756_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
7757{
7758 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7759 (__v4si)_mm256_cvtepi64_epi32(__A),
7760 (__v4si)__O);
7761}
7762
7763static __inline__ __m128i __DEFAULT_FN_ATTRS256
7765{
7766 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
7767 (__v4si)_mm256_cvtepi64_epi32(__A),
7768 (__v4si)_mm_setzero_si128());
7769}
7770
7771static __inline__ void __DEFAULT_FN_ATTRS256
7772_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
7773{
7774 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
7775}
7776
7777static __inline__ __m128i __DEFAULT_FN_ATTRS128
7779{
7780 return (__m128i)__builtin_shufflevector(
7781 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
7782 3, 3, 3, 3);
7783}
7784
7785static __inline__ __m128i __DEFAULT_FN_ATTRS128
7786_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
7787{
7788 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7789 (__v8hi)__O,
7790 __M);
7791}
7792
7793static __inline__ __m128i __DEFAULT_FN_ATTRS128
7795{
7796 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
7797 (__v8hi) _mm_setzero_si128 (),
7798 __M);
7799}
7800
7801static __inline__ void __DEFAULT_FN_ATTRS128
7802_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
7803{
7804 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
7805}
7806
7807static __inline__ __m128i __DEFAULT_FN_ATTRS256
7809{
7810 return (__m128i)__builtin_shufflevector(
7811 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
7812 2, 3, 4, 5, 6, 7);
7813}
7814
7815static __inline__ __m128i __DEFAULT_FN_ATTRS256
7816_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
7817{
7818 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7819 (__v8hi) __O, __M);
7820}
7821
7822static __inline__ __m128i __DEFAULT_FN_ATTRS256
7824{
7825 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
7826 (__v8hi) _mm_setzero_si128 (),
7827 __M);
7828}
7829
7830static __inline__ void __DEFAULT_FN_ATTRS256
7831_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
7832{
7833 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
7834}
7835
7836#define _mm256_extractf32x4_ps(A, imm) \
7837 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7838 (int)(imm), \
7839 (__v4sf)_mm_undefined_ps(), \
7840 (__mmask8)-1)
7841
7842#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
7843 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7844 (int)(imm), \
7845 (__v4sf)(__m128)(W), \
7846 (__mmask8)(U))
7847
7848#define _mm256_maskz_extractf32x4_ps(U, A, imm) \
7849 (__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
7850 (int)(imm), \
7851 (__v4sf)_mm_setzero_ps(), \
7852 (__mmask8)(U))
7853
7854#define _mm256_extracti32x4_epi32(A, imm) \
7855 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7856 (int)(imm), \
7857 (__v4si)_mm_undefined_si128(), \
7858 (__mmask8)-1)
7859
7860#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
7861 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7862 (int)(imm), \
7863 (__v4si)(__m128i)(W), \
7864 (__mmask8)(U))
7865
7866#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
7867 (__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
7868 (int)(imm), \
7869 (__v4si)_mm_setzero_si128(), \
7870 (__mmask8)(U))
7871
7872#define _mm256_insertf32x4(A, B, imm) \
7873 (__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
7874 (__v4sf)(__m128)(B), (int)(imm))
7875
7876#define _mm256_mask_insertf32x4(W, U, A, B, imm) \
7877 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7878 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7879 (__v8sf)(__m256)(W))
7880
7881#define _mm256_maskz_insertf32x4(U, A, B, imm) \
7882 (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
7883 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
7884 (__v8sf)_mm256_setzero_ps())
7885
7886#define _mm256_inserti32x4(A, B, imm) \
7887 (__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
7888 (__v4si)(__m128i)(B), (int)(imm))
7889
7890#define _mm256_mask_inserti32x4(W, U, A, B, imm) \
7891 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7892 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7893 (__v8si)(__m256i)(W))
7894
7895#define _mm256_maskz_inserti32x4(U, A, B, imm) \
7896 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
7897 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
7898 (__v8si)_mm256_setzero_si256())
7899
7900#define _mm_getmant_pd(A, B, C) \
7901 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7902 (int)(((C)<<2) | (B)), \
7903 (__v2df)_mm_setzero_pd(), \
7904 (__mmask8)-1)
7905
7906#define _mm_mask_getmant_pd(W, U, A, B, C) \
7907 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7908 (int)(((C)<<2) | (B)), \
7909 (__v2df)(__m128d)(W), \
7910 (__mmask8)(U))
7911
7912#define _mm_maskz_getmant_pd(U, A, B, C) \
7913 (__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
7914 (int)(((C)<<2) | (B)), \
7915 (__v2df)_mm_setzero_pd(), \
7916 (__mmask8)(U))
7917
7918#define _mm256_getmant_pd(A, B, C) \
7919 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7920 (int)(((C)<<2) | (B)), \
7921 (__v4df)_mm256_setzero_pd(), \
7922 (__mmask8)-1)
7923
7924#define _mm256_mask_getmant_pd(W, U, A, B, C) \
7925 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7926 (int)(((C)<<2) | (B)), \
7927 (__v4df)(__m256d)(W), \
7928 (__mmask8)(U))
7929
7930#define _mm256_maskz_getmant_pd(U, A, B, C) \
7931 (__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
7932 (int)(((C)<<2) | (B)), \
7933 (__v4df)_mm256_setzero_pd(), \
7934 (__mmask8)(U))
7935
7936#define _mm_getmant_ps(A, B, C) \
7937 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7938 (int)(((C)<<2) | (B)), \
7939 (__v4sf)_mm_setzero_ps(), \
7940 (__mmask8)-1)
7941
7942#define _mm_mask_getmant_ps(W, U, A, B, C) \
7943 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7944 (int)(((C)<<2) | (B)), \
7945 (__v4sf)(__m128)(W), \
7946 (__mmask8)(U))
7947
7948#define _mm_maskz_getmant_ps(U, A, B, C) \
7949 (__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
7950 (int)(((C)<<2) | (B)), \
7951 (__v4sf)_mm_setzero_ps(), \
7952 (__mmask8)(U))
7953
7954#define _mm256_getmant_ps(A, B, C) \
7955 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7956 (int)(((C)<<2) | (B)), \
7957 (__v8sf)_mm256_setzero_ps(), \
7958 (__mmask8)-1)
7959
7960#define _mm256_mask_getmant_ps(W, U, A, B, C) \
7961 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7962 (int)(((C)<<2) | (B)), \
7963 (__v8sf)(__m256)(W), \
7964 (__mmask8)(U))
7965
7966#define _mm256_maskz_getmant_ps(U, A, B, C) \
7967 (__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
7968 (int)(((C)<<2) | (B)), \
7969 (__v8sf)_mm256_setzero_ps(), \
7970 (__mmask8)(U))
7971
7972#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7973 (__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
7974 (void const *)(addr), \
7975 (__v2di)(__m128i)(index), \
7976 (__mmask8)(mask), (int)(scale))
7977
7978#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7979 (__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
7980 (void const *)(addr), \
7981 (__v2di)(__m128i)(index), \
7982 (__mmask8)(mask), (int)(scale))
7983
7984#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
7985 (__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
7986 (void const *)(addr), \
7987 (__v4di)(__m256i)(index), \
7988 (__mmask8)(mask), (int)(scale))
7989
7990#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
7991 (__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
7992 (void const *)(addr), \
7993 (__v4di)(__m256i)(index), \
7994 (__mmask8)(mask), (int)(scale))
7995
7996#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
7997 (__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
7998 (void const *)(addr), \
7999 (__v2di)(__m128i)(index), \
8000 (__mmask8)(mask), (int)(scale))
8001
8002#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8003 (__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
8004 (void const *)(addr), \
8005 (__v2di)(__m128i)(index), \
8006 (__mmask8)(mask), (int)(scale))
8007
8008#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
8009 (__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
8010 (void const *)(addr), \
8011 (__v4di)(__m256i)(index), \
8012 (__mmask8)(mask), (int)(scale))
8013
8014#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
8015 (__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
8016 (void const *)(addr), \
8017 (__v4di)(__m256i)(index), \
8018 (__mmask8)(mask), (int)(scale))
8019
8020#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8021 (__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
8022 (void const *)(addr), \
8023 (__v4si)(__m128i)(index), \
8024 (__mmask8)(mask), (int)(scale))
8025
8026#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8027 (__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
8028 (void const *)(addr), \
8029 (__v4si)(__m128i)(index), \
8030 (__mmask8)(mask), (int)(scale))
8031
8032#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
8033 (__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
8034 (void const *)(addr), \
8035 (__v4si)(__m128i)(index), \
8036 (__mmask8)(mask), (int)(scale))
8037
8038#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
8039 (__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
8040 (void const *)(addr), \
8041 (__v4si)(__m128i)(index), \
8042 (__mmask8)(mask), (int)(scale))
8043
8044#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8045 (__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
8046 (void const *)(addr), \
8047 (__v4si)(__m128i)(index), \
8048 (__mmask8)(mask), (int)(scale))
8049
8050#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8051 (__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
8052 (void const *)(addr), \
8053 (__v4si)(__m128i)(index), \
8054 (__mmask8)(mask), (int)(scale))
8055
8056#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
8057 (__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
8058 (void const *)(addr), \
8059 (__v8si)(__m256i)(index), \
8060 (__mmask8)(mask), (int)(scale))
8061
8062#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
8063 (__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
8064 (void const *)(addr), \
8065 (__v8si)(__m256i)(index), \
8066 (__mmask8)(mask), (int)(scale))
8067
8068#define _mm256_permutex_pd(X, C) \
8069 (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))
8070
8071#define _mm256_mask_permutex_pd(W, U, X, C) \
8072 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8073 (__v4df)_mm256_permutex_pd((X), (C)), \
8074 (__v4df)(__m256d)(W))
8075
8076#define _mm256_maskz_permutex_pd(U, X, C) \
8077 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
8078 (__v4df)_mm256_permutex_pd((X), (C)), \
8079 (__v4df)_mm256_setzero_pd())
8080
8081#define _mm256_permutex_epi64(X, C) \
8082 (__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))
8083
8084#define _mm256_mask_permutex_epi64(W, U, X, C) \
8085 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8086 (__v4di)_mm256_permutex_epi64((X), (C)), \
8087 (__v4di)(__m256i)(W))
8088
8089#define _mm256_maskz_permutex_epi64(U, X, C) \
8090 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8091 (__v4di)_mm256_permutex_epi64((X), (C)), \
8092 (__v4di)_mm256_setzero_si256())
8093
8094static __inline__ __m256d __DEFAULT_FN_ATTRS256
8095_mm256_permutexvar_pd (__m256i __X, __m256d __Y)
8096{
8097 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
8098}
8099
8100static __inline__ __m256d __DEFAULT_FN_ATTRS256
8101_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
8102 __m256d __Y)
8103{
8104 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8105 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8106 (__v4df)__W);
8107}
8108
8109static __inline__ __m256d __DEFAULT_FN_ATTRS256
8110_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
8111{
8112 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
8113 (__v4df)_mm256_permutexvar_pd(__X, __Y),
8114 (__v4df)_mm256_setzero_pd());
8115}
8116
8117static __inline__ __m256i __DEFAULT_FN_ATTRS256
8118_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
8119{
8120 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
8121}
8122
8123static __inline__ __m256i __DEFAULT_FN_ATTRS256
8124_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
8125{
8126 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8127 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8128 (__v4di)_mm256_setzero_si256());
8129}
8130
8131static __inline__ __m256i __DEFAULT_FN_ATTRS256
8132_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
8133 __m256i __Y)
8134{
8135 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
8136 (__v4di)_mm256_permutexvar_epi64(__X, __Y),
8137 (__v4di)__W);
8138}
8139
8140#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
8141
8142static __inline__ __m256 __DEFAULT_FN_ATTRS256
8143_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
8144{
8145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8146 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8147 (__v8sf)__W);
8148}
8149
8150static __inline__ __m256 __DEFAULT_FN_ATTRS256
8151_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
8152{
8153 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8154 (__v8sf)_mm256_permutexvar_ps(__X, __Y),
8155 (__v8sf)_mm256_setzero_ps());
8156}
8157
8158#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
8159
8160static __inline__ __m256i __DEFAULT_FN_ATTRS256
8161_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
8162 __m256i __Y)
8163{
8164 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8165 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8166 (__v8si)__W);
8167}
8168
8169static __inline__ __m256i __DEFAULT_FN_ATTRS256
8170_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
8171{
8172 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
8173 (__v8si)_mm256_permutexvar_epi32(__X, __Y),
8174 (__v8si)_mm256_setzero_si256());
8175}
8176
8177#define _mm_alignr_epi32(A, B, imm) \
8178 (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
8179 (__v4si)(__m128i)(B), (int)(imm))
8180
8181#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
8182 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8183 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8184 (__v4si)(__m128i)(W))
8185
8186#define _mm_maskz_alignr_epi32(U, A, B, imm) \
8187 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8188 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
8189 (__v4si)_mm_setzero_si128())
8190
8191#define _mm256_alignr_epi32(A, B, imm) \
8192 (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
8193 (__v8si)(__m256i)(B), (int)(imm))
8194
8195#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
8196 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8197 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8198 (__v8si)(__m256i)(W))
8199
8200#define _mm256_maskz_alignr_epi32(U, A, B, imm) \
8201 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8202 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
8203 (__v8si)_mm256_setzero_si256())
8204
8205#define _mm_alignr_epi64(A, B, imm) \
8206 (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
8207 (__v2di)(__m128i)(B), (int)(imm))
8208
8209#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
8210 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8211 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8212 (__v2di)(__m128i)(W))
8213
8214#define _mm_maskz_alignr_epi64(U, A, B, imm) \
8215 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
8216 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
8217 (__v2di)_mm_setzero_si128())
8218
8219#define _mm256_alignr_epi64(A, B, imm) \
8220 (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
8221 (__v4di)(__m256i)(B), (int)(imm))
8222
8223#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
8224 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8225 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8226 (__v4di)(__m256i)(W))
8227
8228#define _mm256_maskz_alignr_epi64(U, A, B, imm) \
8229 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
8230 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
8231 (__v4di)_mm256_setzero_si256())
8232
8233static __inline__ __m128 __DEFAULT_FN_ATTRS128
8234_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8235{
8236 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8237 (__v4sf)_mm_movehdup_ps(__A),
8238 (__v4sf)__W);
8239}
8240
8241static __inline__ __m128 __DEFAULT_FN_ATTRS128
8243{
8244 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8245 (__v4sf)_mm_movehdup_ps(__A),
8246 (__v4sf)_mm_setzero_ps());
8247}
8248
8249static __inline__ __m256 __DEFAULT_FN_ATTRS256
8250_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8251{
8252 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8253 (__v8sf)_mm256_movehdup_ps(__A),
8254 (__v8sf)__W);
8255}
8256
8257static __inline__ __m256 __DEFAULT_FN_ATTRS256
8259{
8260 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8261 (__v8sf)_mm256_movehdup_ps(__A),
8262 (__v8sf)_mm256_setzero_ps());
8263}
8264
8265static __inline__ __m128 __DEFAULT_FN_ATTRS128
8266_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
8267{
8268 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8269 (__v4sf)_mm_moveldup_ps(__A),
8270 (__v4sf)__W);
8271}
8272
8273static __inline__ __m128 __DEFAULT_FN_ATTRS128
8275{
8276 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
8277 (__v4sf)_mm_moveldup_ps(__A),
8278 (__v4sf)_mm_setzero_ps());
8279}
8280
8281static __inline__ __m256 __DEFAULT_FN_ATTRS256
8282_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
8283{
8284 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8285 (__v8sf)_mm256_moveldup_ps(__A),
8286 (__v8sf)__W);
8287}
8288
8289static __inline__ __m256 __DEFAULT_FN_ATTRS256
8291{
8292 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
8293 (__v8sf)_mm256_moveldup_ps(__A),
8294 (__v8sf)_mm256_setzero_ps());
8295}
8296
8297#define _mm256_mask_shuffle_epi32(W, U, A, I) \
8298 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8299 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8300 (__v8si)(__m256i)(W))
8301
8302#define _mm256_maskz_shuffle_epi32(U, A, I) \
8303 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
8304 (__v8si)_mm256_shuffle_epi32((A), (I)), \
8305 (__v8si)_mm256_setzero_si256())
8306
8307#define _mm_mask_shuffle_epi32(W, U, A, I) \
8308 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8309 (__v4si)_mm_shuffle_epi32((A), (I)), \
8310 (__v4si)(__m128i)(W))
8311
8312#define _mm_maskz_shuffle_epi32(U, A, I) \
8313 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
8314 (__v4si)_mm_shuffle_epi32((A), (I)), \
8315 (__v4si)_mm_setzero_si128())
8316
8317static __inline__ __m128d __DEFAULT_FN_ATTRS128
8318_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
8319{
8320 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8321 (__v2df) __A,
8322 (__v2df) __W);
8323}
8324
8325static __inline__ __m128d __DEFAULT_FN_ATTRS128
8326_mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
8327{
8328 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
8329 (__v2df) __A,
8330 (__v2df) _mm_setzero_pd ());
8331}
8332
8333static __inline__ __m256d __DEFAULT_FN_ATTRS256
8334_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
8335{
8336 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8337 (__v4df) __A,
8338 (__v4df) __W);
8339}
8340
8341static __inline__ __m256d __DEFAULT_FN_ATTRS256
8343{
8344 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
8345 (__v4df) __A,
8346 (__v4df) _mm256_setzero_pd ());
8347}
8348
8349static __inline__ __m128 __DEFAULT_FN_ATTRS128
8350_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
8351{
8352 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8353 (__v4sf) __A,
8354 (__v4sf) __W);
8355}
8356
8357static __inline__ __m128 __DEFAULT_FN_ATTRS128
8359{
8360 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
8361 (__v4sf) __A,
8362 (__v4sf) _mm_setzero_ps ());
8363}
8364
8365static __inline__ __m256 __DEFAULT_FN_ATTRS256
8366_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
8367{
8368 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8369 (__v8sf) __A,
8370 (__v8sf) __W);
8371}
8372
8373static __inline__ __m256 __DEFAULT_FN_ATTRS256
8375{
8376 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
8377 (__v8sf) __A,
8378 (__v8sf) _mm256_setzero_ps ());
8379}
8380
8381static __inline__ __m128 __DEFAULT_FN_ATTRS128
8382_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
8383{
8384 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8385 (__v4sf) __W,
8386 (__mmask8) __U);
8387}
8388
8389static __inline__ __m128 __DEFAULT_FN_ATTRS128
8391{
8392 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
8393 (__v4sf)
8394 _mm_setzero_ps (),
8395 (__mmask8) __U);
8396}
8397
8398static __inline__ __m256 __DEFAULT_FN_ATTRS256
8399_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
8400{
8401 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8402 (__v8sf) __W,
8403 (__mmask8) __U);
8404}
8405
8406static __inline__ __m256 __DEFAULT_FN_ATTRS256
8408{
8409 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
8410 (__v8sf)
8412 (__mmask8) __U);
8413}
8414
8415#define _mm_mask_cvt_roundps_ph(W, U, A, I) \
8416 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8417 (__v8hi)(__m128i)(W), \
8418 (__mmask8)(U))
8419
8420#define _mm_maskz_cvt_roundps_ph(U, A, I) \
8421 (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
8422 (__v8hi)_mm_setzero_si128(), \
8423 (__mmask8)(U))
8424
8425#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph
8426#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
8427
8428#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
8429 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8430 (__v8hi)(__m128i)(W), \
8431 (__mmask8)(U))
8432
8433#define _mm256_maskz_cvt_roundps_ph(U, A, I) \
8434 (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
8435 (__v8hi)_mm_setzero_si128(), \
8436 (__mmask8)(U))
8437
8438#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph
8439#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
8440
8441
8442#undef __DEFAULT_FN_ATTRS128
8443#undef __DEFAULT_FN_ATTRS256
8444
8445#endif /* __AVX512VLINTRIN_H */
static __inline__ vector float vector float __b
Definition altivec.h:520
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count)
Definition avx2intrin.h:587
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count)
Definition avx2intrin.h:593
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V)
Definition avx2intrin.h:380
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b)
Definition avx2intrin.h:121
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V)
Definition avx2intrin.h:356
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count)
Definition avx2intrin.h:521
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
Definition avx2intrin.h:695
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X)
Definition avx2intrin.h:791
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count)
Definition avx2intrin.h:533
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V)
Definition avx2intrin.h:340
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count)
Definition avx2intrin.h:515
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:410
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count)
Definition avx2intrin.h:551
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:440
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b)
Definition avx2intrin.h:623
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
Definition avx2intrin.h:891
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:689
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b)
Definition avx2intrin.h:284
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y)
Definition avx2intrin.h:903
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X)
Definition avx2intrin.h:732
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y)
Definition avx2intrin.h:921
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y)
Definition avx2intrin.h:933
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b)
Definition avx2intrin.h:87
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y)
Definition avx2intrin.h:885
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:665
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:266
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
Definition avx2intrin.h:879
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X)
Definition avx2intrin.h:714
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
Definition avx2intrin.h:927
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V)
Definition avx2intrin.h:392
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count)
Definition avx2intrin.h:557
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y)
Definition avx2intrin.h:909
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:302
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V)
Definition avx2intrin.h:386
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
Definition avx2intrin.h:915
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b)
Definition avx2intrin.h:320
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:617
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V)
Definition avx2intrin.h:398
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V)
Definition avx2intrin.h:368
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
Definition avx2intrin.h:671
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X)
Definition avx2intrin.h:797
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count)
Definition avx2intrin.h:599
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count)
Definition avx2intrin.h:581
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count)
Definition avx2intrin.h:527
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V)
Definition avx2intrin.h:404
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X)
Definition avx2intrin.h:766
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V)
Definition avx2intrin.h:362
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a)
Definition avx2intrin.h:39
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V)
Definition avx2intrin.h:348
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b)
Definition avx2intrin.h:446
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y)
Definition avx2intrin.h:897
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X)
Definition avx2intrin.h:726
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b)
Definition avx2intrin.h:81
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X)
Definition avx2intrin.h:772
unsigned char __mmask8
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
#define _mm_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd(__mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi32(void *__P, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
#define _mm256_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd(__m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi32(__m128i __a, __m128i __b)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64(void *__P, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64(__m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask(__m256i __A, __m256i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd(__mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi32(__m256i __a, __m256i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi64(__m256i __a, __m256i __b)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd(__mmask8 __U, __m256d __A, __m256d __W)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32(__mmask8 __U, __m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
#define _mm256_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
#define _mm256_mask_cmpneq_epi32_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32(__mmask8 __U, __m256i __A, __m256i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps(__mmask8 __U, __m128 __A, __m128 __W)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps(__mmask8 __U, __m128 __A, __m128 __B)
#define _mm_cmpeq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd(__m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi64(void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps(__mmask8 __U, __m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi32(__m128i __a, __m128i __b)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd(__mmask8 __U, __m128d __A, __m128d __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64(__m128i __A, __m128i __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi32(void *__P, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64(__mmask8 __U, __m256i __A, __m256i __W)
#define _mm256_permutexvar_epi32(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_epi32(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd(__m128d __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32(void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64(__mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_epi64(__m256i __a, __m256i __b)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd(__mmask8 __U, __m256d __A)
#define _mm256_cmpeq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16(__m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd(__m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi64(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16(__m256i __A)
#define _mm_cmpneq_epi64_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd(__m128d __W, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32(__mmask8 __M, int __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd(__m256d __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
#define _mm256_cmpeq_epi32_mask(A, B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd(__m256i __X, __m256d __Y)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B)
#define _mm_cmpeq_epi32_mask(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps(__m256 __W, __mmask8 __U, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask(__m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps(__m128 __A, __m128 __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64(__mmask8 __U, __m128i __A, __m128i __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16(__m256i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd(void *__P, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
short __v2hi __attribute__((__vector_size__(4)))
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_or_epi32(__m128i __a, __m128i __b)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps(__m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16(__m128i __A)
#define _mm256_permutexvar_ps(A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16(__m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd(__m256d __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps(__m128 __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_load_epi64(void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64(__m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_and_epi64(__m128i __a, __m128i __b)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
#define _mm_mask_cmpneq_epi64_mask(k, A, B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, unsigned int __imm)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32(__mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps(__mmask8 __U, __m256 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps(__m256 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32(__m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd(__m128d __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32(void *__P, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps(__mmask8 __U, __m256 __A, __m256 __W)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd(void *__P, __mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_epi32(__m256i __a, __m256i __b)
#define _mm_mask_cmpeq_epi32_mask(k, A, B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps(__m128 __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
#define _mm_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd(__m256d __W, __mmask8 __U, __m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64(__mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
#define _mm256_mask_cmpeq_epi64_mask(k, A, B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd(__m256d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_andnot_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A)
static __inline void __DEFAULT_FN_ATTRS256 _mm256_store_epi64(void *__P, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64(__m256i __A, __m256i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_epi32(__m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps(__m128 __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps(void *__P, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps(__m256 __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps(__m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64(__mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32(__mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps(__m256 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps(__m256 __W, __mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32(__mmask8 __U, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps(__mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32(__mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64(__mmask8 __M, long long __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_load_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64(__m128i __W, __mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16(void *__P, __mmask8 __M, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8(__m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __imm)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32(__m256i __W, __mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A)
static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64(void const *__P)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32(void *__P, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32(__m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64(__mmask8 __U, void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps(__m128 __W, __mmask8 __U, void const *__P)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps(void *__P, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline void __DEFAULT_FN_ATTRS128 _mm_store_epi64(void *__P, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
#define _mm256_cmpneq_epi64_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32(__m128i __W, __mmask8 __U, __m256d __A)
static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32(void const *__P)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
#define _mm_cmpneq_epi32_mask(A, B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32(__m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32(__m256i __W, __mmask8 __U, __m256 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64(__m256i __W, __mmask8 __U, void const *__P)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32(__mmask8 __U, void const *__P)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps(__mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64(void *__P, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32(void *__P, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_xor_epi64(__m128i __a, __m128i __b)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd(__mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64(__m256i __A, __m256i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64(__m128i __W, __mmask8 __U, __m128i __A)
static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8(void *__P, __mmask8 __M, __m128i __A)
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_add_ps(__m256 __a, __m256 __b)
Adds two 256-bit vectors of [8 x float].
Definition avxintrin.h:78
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvtpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32].
Definition avxintrin.h:2225
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpacklo_pd(__m256d __a, __m256d __b)
Unpacks the even-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves the...
Definition avxintrin.h:2408
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sqrt_ps(__m256 __a)
Calculates the square roots of the values in a 256-bit vector of [8 x float].
Definition avxintrin.h:334
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_cvtepi32_ps(__m256i __a)
Converts a vector of [8 x i32] into a vector of [8 x float].
Definition avxintrin.h:2144
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_moveldup_ps(__m256 __a)
Moves and duplicates even-indexed values from a 256-bit vector of [8 x float] to float values in a 25...
Definition avxintrin.h:2341
static __inline __m128 __DEFAULT_FN_ATTRS _mm256_cvtpd_ps(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x float].
Definition avxintrin.h:2160
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
Definition avxintrin.h:4268
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_permutevar_ps(__m256 __a, __m256i __c)
Copies the values stored in a 256-bit vector of [8 x float] as specified by the 256-bit integer vecto...
Definition avxintrin.h:959
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtps_pd(__m128 __a)
Converts a 128-bit vector of [4 x float] into a 256-bit vector of [4 x double].
Definition avxintrin.h:2191
static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c)
Copies the values stored in a 128-bit vector of [4 x float] as specified by the 128-bit integer vecto...
Definition avxintrin.h:868
static __inline __m128i __DEFAULT_FN_ATTRS _mm256_cvttpd_epi32(__m256d __a)
Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4 x i32], truncating the result b...
Definition avxintrin.h:2208
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvtps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32].
Definition avxintrin.h:2175
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi64x(long long __q)
Constructs a 256-bit integer vector of [4 x i64], with each of the 64-bit integral vector elements se...
Definition avxintrin.h:4239
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_min_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the lesser of each pair of values.
Definition avxintrin.h:264
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_cvttps_epi32(__m256 __a)
Converts a vector of [8 x float] into a vector of [8 x i32], truncating the result by rounding toward...
Definition avxintrin.h:2241
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_sub_ps(__m256 __a, __m256 __b)
Subtracts two 256-bit vectors of [8 x float].
Definition avxintrin.h:114
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_max_ps(__m256 __a, __m256 __b)
Compares two 256-bit vectors of [8 x float] and returns the greater of each pair of values.
Definition avxintrin.h:226
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_movehdup_ps(__m256 __a)
Moves and duplicates odd-indexed values from a 256-bit vector of [8 x float] to float values in a 256...
Definition avxintrin.h:2316
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_div_pd(__m256d __a, __m256d __b)
Divides two 256-bit vectors of [4 x double].
Definition avxintrin.h:170
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sqrt_pd(__m256d __a)
Calculates the square roots of the values in a 256-bit vector of [4 x double].
Definition avxintrin.h:317
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_mul_pd(__m256d __a, __m256d __b)
Multiplies two 256-bit vectors of [4 x double].
Definition avxintrin.h:282
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_cvtepi32_pd(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x double].
Definition avxintrin.h:2129
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_unpackhi_pd(__m256d __a, __m256d __b)
Unpacks the odd-indexed vector elements from two 256-bit vectors of [4 x double] and interleaves them...
Definition avxintrin.h:2386
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_sub_pd(__m256d __a, __m256d __b)
Subtracts two 256-bit vectors of [4 x double].
Definition avxintrin.h:96
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
Definition avxintrin.h:4254
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_movedup_pd(__m256d __a)
Moves and duplicates double-precision floating point values from a 256-bit vector of [4 x double] to ...
Definition avxintrin.h:2363
static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c)
Copies the values in a 128-bit vector of [2 x double] as specified by the 128-bit integer vector oper...
Definition avxintrin.h:775
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
Definition avxintrin.h:4281
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_div_ps(__m256 __a, __m256 __b)
Divides two 256-bit vectors of [8 x float].
Definition avxintrin.h:188
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_min_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the lesser of each pair of values.
Definition avxintrin.h:245
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpackhi_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 2, 3, 6 and 7 from each of the two 256-bit vectors of [8 x float] ...
Definition avxintrin.h:2435
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_mul_ps(__m256 __a, __m256 __b)
Multiplies two 256-bit vectors of [8 x float].
Definition avxintrin.h:300
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_unpacklo_ps(__m256 __a, __m256 __b)
Unpacks the 32-bit vector elements 0, 1, 4 and 5 from each of the two 256-bit vectors of [8 x float] ...
Definition avxintrin.h:2462
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_permutevar_pd(__m256d __a, __m256i __c)
Copies the values in a 256-bit vector of [4 x double] as specified by the 256-bit integer vector oper...
Definition avxintrin.h:814
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_max_pd(__m256d __a, __m256d __b)
Compares two 256-bit vectors of [4 x double] and returns the greater of each pair of values.
Definition avxintrin.h:207
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_set1_epi32(int __i)
Constructs a 256-bit integer vector of [8 x i32], with each of the 32-bit integral vector elements se...
Definition avxintrin.h:4182
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_add_pd(__m256d __a, __m256d __b)
Adds two 256-bit vectors of [4 x double].
Definition avxintrin.h:60
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, __m128d __b)
Performs an element-by-element division of two 128-bit vectors of [2 x double].
Definition emmintrin.h:201
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, __m128d __b)
Subtracts two 128-bit vectors of [2 x double].
Definition emmintrin.h:117
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi32(__m128i __a, __m128i __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2897
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_and_si128(__m128i __a, __m128i __b)
Performs a bitwise AND of two 128-bit integer vectors.
Definition emmintrin.h:2743
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, __m128i __b)
Subtracts the corresponding elements of two [2 x i64] vectors.
Definition emmintrin.h:2643
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a)
Converts the lower two integer elements of a 128-bit vector of [4 x i32] into two double-precision fl...
Definition emmintrin.h:1338
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi32(__m128i __a, int __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3095
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_min_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
Definition emmintrin.h:288
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, __m128d __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them ...
Definition emmintrin.h:4776
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi32(__m128i __a, __m128i __count)
Right-shifts each of 32-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3114
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, __m128d __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x double] and interleaves them i...
Definition emmintrin.h:4797
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, __m128i __b)
Unpacks the low-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them into...
Definition emmintrin.h:4703
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srai_epi32(__m128i __a, int __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2995
static __inline__ void int __a
Definition emmintrin.h:4185
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi64(__m128i __a, __m128i __b)
Unpacks the high-order 64-bit elements from two 128-bit vectors of [2 x i64] and interleaves them int...
Definition emmintrin.h:4596
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a)
Converts a vector of [4 x i32] into a vector of [4 x float].
Definition emmintrin.h:3412
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi32(__m128i __a, __m128i __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x i32] and interleaves them i...
Definition emmintrin.h:4682
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sqrt_pd(__m128d __a)
Calculates the square root of the each of two values stored in a 128-bit vector of [2 x double].
Definition emmintrin.h:244
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void)
Generates a 128-bit vector of [4 x i32] with unspecified content.
Definition emmintrin.h:3587
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srli_epi64(__m128i __a, int __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3133
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpackhi_epi32(__m128i __a, __m128i __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x i32] and interleaves them ...
Definition emmintrin.h:4575
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [2 x i64], saving the lower 64 bits of each...
Definition emmintrin.h:2201
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sll_epi64(__m128i __a, __m128i __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2935
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i)
Initializes all values in a 128-bit vector of [4 x i32] with the specified 32-bit value.
Definition emmintrin.h:3804
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi32(__m128i __a, int __count)
Left-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2878
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, __m128i __b)
Adds the corresponding elements of two 128-bit vectors of [4 x i32], saving the lower 32 bits of each...
Definition emmintrin.h:2161
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
Definition emmintrin.h:1911
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epu32(__m128i __a, __m128i __b)
Multiplies 32-bit unsigned integer values contained in the lower bits of the corresponding elements o...
Definition emmintrin.h:2530
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_srl_epi64(__m128i __a, __m128i __count)
Right-shifts each of 64-bit values in the 128-bit integer vector operand by the specified number of b...
Definition emmintrin.h:3152
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvttps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32], truncating the result when it is inexact...
Definition emmintrin.h:3445
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_slli_epi64(__m128i __a, int __count)
Left-shifts each 64-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:2916
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, __m128d __b)
Adds two 128-bit vectors of [2 x double].
Definition emmintrin.h:75
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, __m128d __b)
Performs element-by-element comparison of the two 128-bit vectors of [2 x double] and returns the vec...
Definition emmintrin.h:332
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, __m128d __b)
Multiplies two 128-bit vectors of [2 x double].
Definition emmintrin.h:158
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q)
Initializes both values in a 128-bit integer vector with the specified 64-bit integer value.
Definition emmintrin.h:3766
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, __m128i __b)
Subtracts the corresponding 32-bit integer values in the operands.
Definition emmintrin.h:2606
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a)
Converts the lower two single-precision floating-point elements of a 128-bit vector of [4 x float] in...
Definition emmintrin.h:1315
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
Definition emmintrin.h:3977
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sra_epi32(__m128i __a, __m128i __count)
Right-shifts each 32-bit value in the 128-bit integer vector operand by the specified number of bits.
Definition emmintrin.h:3015
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtps_epi32(__m128 __a)
Converts a vector of [4 x float] into a vector of [4 x i32].
Definition emmintrin.h:3428
static __inline__ unsigned char int __C
Definition ia32intrin.h:373
struct __storeu_i16 *__P __v
Definition immintrin.h:348
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_movedup_pd(__m128d __a)
Moves and duplicates the double-precision value in the lower bits of a 128-bit vector of [2 x double]...
Definition pmmintrin.h:243
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_movehdup_ps(__m128 __a)
Moves and duplicates odd-indexed values from a 128-bit vector of [4 x float] to float values stored i...
Definition pmmintrin.h:121
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_moveldup_ps(__m128 __a)
Duplicates even-indexed values from a 128-bit vector of [4 x float] to float values stored in a 128-b...
Definition pmmintrin.h:142
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi32(__m128i __V)
Zero-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition smmintrin.h:1370
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi32(__m128i __V1, __m128i __V2)
Multiples corresponding elements of two 128-bit vectors of [4 x i32] and returns the lower 32 bits of...
Definition smmintrin.h:534
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition smmintrin.h:798
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi32(__m128i __V)
Sign-extends each of the lower four 8-bit integer elements of a 128-bit vector of [16 x i8] to 32-bit...
Definition smmintrin.h:1251
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V)
Zero-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition smmintrin.h:1446
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi64(__m128i __V)
Zero-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition smmintrin.h:1427
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition smmintrin.h:741
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi64(__m128i __V)
Sign-extends each of the lower two 16-bit integer elements of a 128-bit integer vector of [8 x i16] t...
Definition smmintrin.h:1312
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi8_epi64(__m128i __V)
Sign-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition smmintrin.h:1272
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi16_epi32(__m128i __V)
Sign-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition smmintrin.h:1293
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu16_epi32(__m128i __V)
Zero-extends each of the lower four 16-bit integer elements of a 128-bit integer vector of [8 x i16] ...
Definition smmintrin.h:1408
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_max_epi32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x i32] and returns a 128-bit vector ...
Definition smmintrin.h:760
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu8_epi64(__m128i __V)
Zero-extends each of the lower two 8-bit integer elements of a 128-bit integer vector of [16 x i8] to...
Definition smmintrin.h:1389
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_min_epu32(__m128i __V1, __m128i __V2)
Compares the corresponding elements of two 128-bit vectors of [4 x u32] and returns a 128-bit vector ...
Definition smmintrin.h:779
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mul_epi32(__m128i __V1, __m128i __V2)
Multiplies corresponding even-indexed elements of two 128-bit vectors of [4 x i32] and returns a 128-...
Definition smmintrin.h:554
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepi32_epi64(__m128i __V)
Sign-extends each of the lower two 32-bit integer elements of a 128-bit integer vector of [4 x i32] t...
Definition smmintrin.h:1331
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_abs_epi32(__m128i __a)
Computes the absolute value of each of the packed 32-bit signed integers in the source operand and st...
Definition tmmintrin.h:122
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_div_ps(__m128 __a, __m128 __b)
Divides two 128-bit vectors of [4 x float].
Definition xmmintrin.h:196
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
Definition xmmintrin.h:1903
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_add_ps(__m128 __a, __m128 __b)
Adds two 128-bit vectors of [4 x float], and returns the results of the addition.
Definition xmmintrin.h:70
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_mul_ps(__m128 __a, __m128 __b)
Multiplies two 128-bit vectors of [4 x float] and returns the results of the multiplication.
Definition xmmintrin.h:155
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_max_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the greater of each pair of values.
Definition xmmintrin.h:386
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sub_ps(__m128 __a, __m128 __b)
Subtracts each of the values of the second operand from the first operand, both of which are 128-bit ...
Definition xmmintrin.h:113
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_min_ps(__m128 __a, __m128 __b)
Compares two 128-bit vectors of [4 x float] and returns the lesser of each pair of values.
Definition xmmintrin.h:344
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpackhi_ps(__m128 __a, __m128 __b)
Unpacks the high-order (index 2,3) values from two 128-bit vectors of [4 x float] and interleaves the...
Definition xmmintrin.h:2624
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_sqrt_ps(__m128 __a)
Calculates the square roots of the values stored in a 128-bit vector of [4 x float].
Definition xmmintrin.h:231
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_unpacklo_ps(__m128 __a, __m128 __b)
Unpacks the low-order (index 0,1) values from two 128-bit vectors of [4 x float] and interleaves them...
Definition xmmintrin.h:2646