ETISS 0.8.0
Extendable Translating Instruction Set Simulator (version 0.8.0)
avx512erintrin.h
Go to the documentation of this file.
1 /*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __IMMINTRIN_H
10 #error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
11 #endif
12 
13 #ifndef __AVX512ERINTRIN_H
14 #define __AVX512ERINTRIN_H
15 
16 /* exp2a23 */
17 #define _mm512_exp2a23_round_pd(A, R) \
18  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
19  (__v8df)_mm512_setzero_pd(), \
20  (__mmask8)-1, (int)(R))
21 
22 #define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
23  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
24  (__v8df)(__m512d)(S), (__mmask8)(M), \
25  (int)(R))
26 
27 #define _mm512_maskz_exp2a23_round_pd(M, A, R) \
28  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
29  (__v8df)_mm512_setzero_pd(), \
30  (__mmask8)(M), (int)(R))
31 
32 #define _mm512_exp2a23_pd(A) \
33  _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
34 
35 #define _mm512_mask_exp2a23_pd(S, M, A) \
36  _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
37 
38 #define _mm512_maskz_exp2a23_pd(M, A) \
39  _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
40 
41 #define _mm512_exp2a23_round_ps(A, R) \
42  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
43  (__v16sf)_mm512_setzero_ps(), \
44  (__mmask16)-1, (int)(R))
45 
46 #define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
47  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
48  (__v16sf)(__m512)(S), (__mmask16)(M), \
49  (int)(R))
50 
51 #define _mm512_maskz_exp2a23_round_ps(M, A, R) \
52  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
53  (__v16sf)_mm512_setzero_ps(), \
54  (__mmask16)(M), (int)(R))
55 
56 #define _mm512_exp2a23_ps(A) \
57  _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
58 
59 #define _mm512_mask_exp2a23_ps(S, M, A) \
60  _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
61 
62 #define _mm512_maskz_exp2a23_ps(M, A) \
63  _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
64 
65 /* rsqrt28 */
66 #define _mm512_rsqrt28_round_pd(A, R) \
67  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
68  (__v8df)_mm512_setzero_pd(), \
69  (__mmask8)-1, (int)(R))
70 
71 #define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
72  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
73  (__v8df)(__m512d)(S), (__mmask8)(M), \
74  (int)(R))
75 
76 #define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
77  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
78  (__v8df)_mm512_setzero_pd(), \
79  (__mmask8)(M), (int)(R))
80 
81 #define _mm512_rsqrt28_pd(A) \
82  _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
83 
84 #define _mm512_mask_rsqrt28_pd(S, M, A) \
85  _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
86 
87 #define _mm512_maskz_rsqrt28_pd(M, A) \
88  _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
89 
90 #define _mm512_rsqrt28_round_ps(A, R) \
91  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
92  (__v16sf)_mm512_setzero_ps(), \
93  (__mmask16)-1, (int)(R))
94 
95 #define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
96  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
97  (__v16sf)(__m512)(S), (__mmask16)(M), \
98  (int)(R))
99 
100 #define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
101  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
102  (__v16sf)_mm512_setzero_ps(), \
103  (__mmask16)(M), (int)(R))
104 
105 #define _mm512_rsqrt28_ps(A) \
106  _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
107 
108 #define _mm512_mask_rsqrt28_ps(S, M, A) \
109  _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
110 
111 #define _mm512_maskz_rsqrt28_ps(M, A) \
112  _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
113 
114 #define _mm_rsqrt28_round_ss(A, B, R) \
115  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
116  (__v4sf)(__m128)(B), \
117  (__v4sf)_mm_setzero_ps(), \
118  (__mmask8)-1, (int)(R))
119 
120 #define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
121  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
122  (__v4sf)(__m128)(B), \
123  (__v4sf)(__m128)(S), \
124  (__mmask8)(M), (int)(R))
125 
126 #define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
127  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
128  (__v4sf)(__m128)(B), \
129  (__v4sf)_mm_setzero_ps(), \
130  (__mmask8)(M), (int)(R))
131 
132 #define _mm_rsqrt28_ss(A, B) \
133  _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
134 
135 #define _mm_mask_rsqrt28_ss(S, M, A, B) \
136  _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
137 
138 #define _mm_maskz_rsqrt28_ss(M, A, B) \
139  _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
140 
141 #define _mm_rsqrt28_round_sd(A, B, R) \
142  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
143  (__v2df)(__m128d)(B), \
144  (__v2df)_mm_setzero_pd(), \
145  (__mmask8)-1, (int)(R))
146 
147 #define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
148  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
149  (__v2df)(__m128d)(B), \
150  (__v2df)(__m128d)(S), \
151  (__mmask8)(M), (int)(R))
152 
153 #define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
154  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
155  (__v2df)(__m128d)(B), \
156  (__v2df)_mm_setzero_pd(), \
157  (__mmask8)(M), (int)(R))
158 
159 #define _mm_rsqrt28_sd(A, B) \
160  _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
161 
162 #define _mm_mask_rsqrt28_sd(S, M, A, B) \
163  _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
164 
165 #define _mm_maskz_rsqrt28_sd(M, A, B) \
166  _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
167 
168 /* rcp28 */
169 #define _mm512_rcp28_round_pd(A, R) \
170  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
171  (__v8df)_mm512_setzero_pd(), \
172  (__mmask8)-1, (int)(R))
173 
174 #define _mm512_mask_rcp28_round_pd(S, M, A, R) \
175  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
176  (__v8df)(__m512d)(S), (__mmask8)(M), \
177  (int)(R))
178 
179 #define _mm512_maskz_rcp28_round_pd(M, A, R) \
180  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
181  (__v8df)_mm512_setzero_pd(), \
182  (__mmask8)(M), (int)(R))
183 
184 #define _mm512_rcp28_pd(A) \
185  _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
186 
187 #define _mm512_mask_rcp28_pd(S, M, A) \
188  _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
189 
190 #define _mm512_maskz_rcp28_pd(M, A) \
191  _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
192 
193 #define _mm512_rcp28_round_ps(A, R) \
194  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
195  (__v16sf)_mm512_setzero_ps(), \
196  (__mmask16)-1, (int)(R))
197 
198 #define _mm512_mask_rcp28_round_ps(S, M, A, R) \
199  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
200  (__v16sf)(__m512)(S), (__mmask16)(M), \
201  (int)(R))
202 
203 #define _mm512_maskz_rcp28_round_ps(M, A, R) \
204  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
205  (__v16sf)_mm512_setzero_ps(), \
206  (__mmask16)(M), (int)(R))
207 
208 #define _mm512_rcp28_ps(A) \
209  _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
210 
211 #define _mm512_mask_rcp28_ps(S, M, A) \
212  _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
213 
214 #define _mm512_maskz_rcp28_ps(M, A) \
215  _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
216 
217 #define _mm_rcp28_round_ss(A, B, R) \
218  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
219  (__v4sf)(__m128)(B), \
220  (__v4sf)_mm_setzero_ps(), \
221  (__mmask8)-1, (int)(R))
222 
223 #define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
224  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
225  (__v4sf)(__m128)(B), \
226  (__v4sf)(__m128)(S), \
227  (__mmask8)(M), (int)(R))
228 
229 #define _mm_maskz_rcp28_round_ss(M, A, B, R) \
230  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
231  (__v4sf)(__m128)(B), \
232  (__v4sf)_mm_setzero_ps(), \
233  (__mmask8)(M), (int)(R))
234 
235 #define _mm_rcp28_ss(A, B) \
236  _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
237 
238 #define _mm_mask_rcp28_ss(S, M, A, B) \
239  _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
240 
241 #define _mm_maskz_rcp28_ss(M, A, B) \
242  _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
243 
244 #define _mm_rcp28_round_sd(A, B, R) \
245  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
246  (__v2df)(__m128d)(B), \
247  (__v2df)_mm_setzero_pd(), \
248  (__mmask8)-1, (int)(R))
249 
250 #define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
251  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
252  (__v2df)(__m128d)(B), \
253  (__v2df)(__m128d)(S), \
254  (__mmask8)(M), (int)(R))
255 
256 #define _mm_maskz_rcp28_round_sd(M, A, B, R) \
257  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
258  (__v2df)(__m128d)(B), \
259  (__v2df)_mm_setzero_pd(), \
260  (__mmask8)(M), (int)(R))
261 
262 #define _mm_rcp28_sd(A, B) \
263  _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
264 
265 #define _mm_mask_rcp28_sd(S, M, A, B) \
266  _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
267 
268 #define _mm_maskz_rcp28_sd(M, A, B) \
269  _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
270 
271 #endif /* __AVX512ERINTRIN_H */