11 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
14 #ifndef __AVX512VLDQINTRIN_H
15 #define __AVX512VLDQINTRIN_H
18 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
19 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
23 return (__m256i) ((__v4du) __A * (__v4du) __B);
28 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
35 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__U,
42 return (__m128i) ((__v2du) __A * (__v2du) __B);
47 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
54 return (__m128i)__builtin_ia32_selectq_128((
__mmask8)__U,
61 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
68 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
75 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
82 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
89 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
96 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
103 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
110 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
117 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
124 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
131 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
138 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
145 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
152 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
159 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
166 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
173 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
180 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
187 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
194 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
201 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
208 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
215 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
222 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
229 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
236 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
243 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
250 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
257 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
264 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__U,
271 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
278 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
285 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
292 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
299 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
306 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
313 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
320 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
327 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
334 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
341 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
348 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
355 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
362 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
369 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
376 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
383 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
390 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
397 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
404 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
411 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
418 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
425 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
432 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
439 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
446 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
453 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
458 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
465 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
472 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
477 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
484 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
491 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
498 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
505 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
512 return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
517 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
524 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
531 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
538 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
545 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
552 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
559 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
566 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
573 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
580 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
587 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
594 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
601 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
608 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
615 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
622 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
629 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
636 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
643 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
650 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
657 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
664 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
671 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
678 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
685 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
692 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
699 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
704 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
711 return (__m128d)__builtin_ia32_selectpd_128((
__mmask8)__U,
718 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
723 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
730 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__U,
737 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
744 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
751 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
758 return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
763 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
770 return (__m128)__builtin_ia32_selectps_128((
__mmask8)__U,
775 #define _mm_range_pd(A, B, C) \
776 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
777 (__v2df)(__m128d)(B), (int)(C), \
778 (__v2df)_mm_setzero_pd(), \
781 #define _mm_mask_range_pd(W, U, A, B, C) \
782 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
783 (__v2df)(__m128d)(B), (int)(C), \
784 (__v2df)(__m128d)(W), \
787 #define _mm_maskz_range_pd(U, A, B, C) \
788 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
789 (__v2df)(__m128d)(B), (int)(C), \
790 (__v2df)_mm_setzero_pd(), \
793 #define _mm256_range_pd(A, B, C) \
794 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
795 (__v4df)(__m256d)(B), (int)(C), \
796 (__v4df)_mm256_setzero_pd(), \
799 #define _mm256_mask_range_pd(W, U, A, B, C) \
800 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
801 (__v4df)(__m256d)(B), (int)(C), \
802 (__v4df)(__m256d)(W), \
805 #define _mm256_maskz_range_pd(U, A, B, C) \
806 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
807 (__v4df)(__m256d)(B), (int)(C), \
808 (__v4df)_mm256_setzero_pd(), \
811 #define _mm_range_ps(A, B, C) \
812 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
813 (__v4sf)(__m128)(B), (int)(C), \
814 (__v4sf)_mm_setzero_ps(), \
817 #define _mm_mask_range_ps(W, U, A, B, C) \
818 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
819 (__v4sf)(__m128)(B), (int)(C), \
820 (__v4sf)(__m128)(W), (__mmask8)(U))
822 #define _mm_maskz_range_ps(U, A, B, C) \
823 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
824 (__v4sf)(__m128)(B), (int)(C), \
825 (__v4sf)_mm_setzero_ps(), \
828 #define _mm256_range_ps(A, B, C) \
829 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
830 (__v8sf)(__m256)(B), (int)(C), \
831 (__v8sf)_mm256_setzero_ps(), \
834 #define _mm256_mask_range_ps(W, U, A, B, C) \
835 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
836 (__v8sf)(__m256)(B), (int)(C), \
837 (__v8sf)(__m256)(W), (__mmask8)(U))
839 #define _mm256_maskz_range_ps(U, A, B, C) \
840 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
841 (__v8sf)(__m256)(B), (int)(C), \
842 (__v8sf)_mm256_setzero_ps(), \
845 #define _mm_reduce_pd(A, B) \
846 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
847 (__v2df)_mm_setzero_pd(), \
850 #define _mm_mask_reduce_pd(W, U, A, B) \
851 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
852 (__v2df)(__m128d)(W), \
855 #define _mm_maskz_reduce_pd(U, A, B) \
856 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
857 (__v2df)_mm_setzero_pd(), \
860 #define _mm256_reduce_pd(A, B) \
861 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
862 (__v4df)_mm256_setzero_pd(), \
865 #define _mm256_mask_reduce_pd(W, U, A, B) \
866 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
867 (__v4df)(__m256d)(W), \
870 #define _mm256_maskz_reduce_pd(U, A, B) \
871 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
872 (__v4df)_mm256_setzero_pd(), \
875 #define _mm_reduce_ps(A, B) \
876 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
877 (__v4sf)_mm_setzero_ps(), \
880 #define _mm_mask_reduce_ps(W, U, A, B) \
881 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
882 (__v4sf)(__m128)(W), \
885 #define _mm_maskz_reduce_ps(U, A, B) \
886 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
887 (__v4sf)_mm_setzero_ps(), \
890 #define _mm256_reduce_ps(A, B) \
891 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
892 (__v8sf)_mm256_setzero_ps(), \
895 #define _mm256_mask_reduce_ps(W, U, A, B) \
896 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
897 (__v8sf)(__m256)(W), \
900 #define _mm256_maskz_reduce_ps(U, A, B) \
901 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
902 (__v8sf)_mm256_setzero_ps(), \
908 return (
__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
914 return (
__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
920 return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
926 return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
932 return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
938 return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
944 return (
__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
950 return (
__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
956 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
957 0, 1, 0, 1, 0, 1, 0, 1);
963 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
971 return (__m256)__builtin_ia32_selectps_256((
__mmask8)__M,
979 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
986 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
994 return (__m256d)__builtin_ia32_selectpd_256((
__mmask8)__M,
1002 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1009 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
1017 return (__m128i)__builtin_ia32_selectd_128((
__mmask8)__M,
1025 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1026 0, 1, 0, 1, 0, 1, 0, 1);
1032 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
1040 return (__m256i)__builtin_ia32_selectd_256((
__mmask8)__M,
1048 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1055 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1063 return (__m256i)__builtin_ia32_selectq_256((
__mmask8)__M,
1068 #define _mm256_extractf64x2_pd(A, imm) \
1069 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1071 (__v2df)_mm_undefined_pd(), \
1074 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
1075 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1077 (__v2df)(__m128d)(W), \
1080 #define _mm256_maskz_extractf64x2_pd(U, A, imm) \
1081 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
1083 (__v2df)_mm_setzero_pd(), \
1086 #define _mm256_extracti64x2_epi64(A, imm) \
1087 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1089 (__v2di)_mm_undefined_si128(), \
1092 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
1093 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1095 (__v2di)(__m128i)(W), \
1098 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
1099 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
1101 (__v2di)_mm_setzero_si128(), \
1104 #define _mm256_insertf64x2(A, B, imm) \
1105 (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
1106 (__v2df)(__m128d)(B), (int)(imm))
1108 #define _mm256_mask_insertf64x2(W, U, A, B, imm) \
1109 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1110 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1111 (__v4df)(__m256d)(W))
1113 #define _mm256_maskz_insertf64x2(U, A, B, imm) \
1114 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
1115 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
1116 (__v4df)_mm256_setzero_pd())
1118 #define _mm256_inserti64x2(A, B, imm) \
1119 (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
1120 (__v2di)(__m128i)(B), (int)(imm))
1122 #define _mm256_mask_inserti64x2(W, U, A, B, imm) \
1123 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1124 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1125 (__v4di)(__m256i)(W))
1127 #define _mm256_maskz_inserti64x2(U, A, B, imm) \
1128 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
1129 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
1130 (__v4di)_mm256_setzero_si256())
1132 #define _mm_mask_fpclass_pd_mask(U, A, imm) \
1133 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1136 #define _mm_fpclass_pd_mask(A, imm) \
1137 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
1140 #define _mm256_mask_fpclass_pd_mask(U, A, imm) \
1141 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1144 #define _mm256_fpclass_pd_mask(A, imm) \
1145 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
1148 #define _mm_mask_fpclass_ps_mask(U, A, imm) \
1149 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1152 #define _mm_fpclass_ps_mask(A, imm) \
1153 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
1156 #define _mm256_mask_fpclass_ps_mask(U, A, imm) \
1157 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1160 #define _mm256_fpclass_ps_mask(A, imm) \
1161 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
1164 #undef __DEFAULT_FN_ATTRS128
1165 #undef __DEFAULT_FN_ATTRS256
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask(__m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64(__m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64(__m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64(__m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64(__mmask8 __U, __m256d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd(__m256i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64(__mmask8 __U, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask(__m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd(__m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd(__mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps(__mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64(__mmask8 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64(__m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps(__m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd(__m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps(__mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask(__m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32(__mmask8 __A)
#define __DEFAULT_FN_ATTRS256
static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m256i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
#define __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64(__m256i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64(__m256d __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64(__m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64(__m128 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64(__mmask8 __U, __m128d __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2(__m128i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64(__mmask8 __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps(__m128 __W, __mmask8 __U, __m128i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64(__m256d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64(__m128d __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64(__m128i __W, __mmask8 __U, __m128d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd(__m256d __W, __mmask8 __U, __m256i __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64(__m128i __W, __mmask8 __U, __m128 __A)
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2(__m128 __A)
static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask(__m128i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd(__m128d __W, __mmask8 __U, __m128i __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd(__mmask8 __U, __m256i __A)
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64(__mmask8 __U, __m128 __A)
static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64(__m256d __A)
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64(__m128d __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64(__m128 __A)
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64(__m256i __W, __mmask8 __U, __m256d __A)
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_xor_pd(__m256d __a, __m256d __b)
Performs a bitwise XOR of two 256-bit vectors of [4 x double].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_setzero_ps(void)
Constructs a 256-bit floating-point vector of [8 x float] with all vector elements initialized to zer...
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_xor_ps(__m256 __a, __m256 __b)
Performs a bitwise XOR of two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_and_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_or_ps(__m256 __a, __m256 __b)
Performs a bitwise OR of two 256-bit vectors of [8 x float].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_andnot_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double], using the one's complement of the valu...
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_or_pd(__m256d __a, __m256d __b)
Performs a bitwise OR of two 256-bit vectors of [4 x double].
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_setzero_pd(void)
Constructs a 256-bit floating-point vector of [4 x double] with all vector elements initialized to ze...
static __inline __m256i __DEFAULT_FN_ATTRS _mm256_setzero_si256(void)
Constructs a 256-bit integer vector initialized to zero.
static __inline __m256d __DEFAULT_FN_ATTRS _mm256_and_pd(__m256d __a, __m256d __b)
Performs a bitwise AND of two 256-bit vectors of [4 x double].
static __inline __m256 __DEFAULT_FN_ATTRS _mm256_andnot_ps(__m256 __a, __m256 __b)
Performs a bitwise AND of two 256-bit vectors of [8 x float], using the one's complement of the value...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, __m128d __b)
Performs a bitwise OR of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, __m128d __b)
Performs a bitwise XOR of two 128-bit vectors of [2 x double].
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double], using the one's complement of the valu...
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void)
Constructs a 128-bit floating-point vector of [2 x double] initialized to zero.
static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, __m128d __b)
Performs a bitwise AND of two 128-bit vectors of [2 x double].
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void)
Creates a 128-bit integer vector initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_setzero_ps(void)
Constructs a 128-bit floating-point vector of [4 x float] initialized to zero.
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_and_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_andnot_ps(__m128 __a, __m128 __b)
Performs a bitwise AND of two 128-bit vectors of [4 x float], using the one's complement of the value...
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_xor_ps(__m128 __a, __m128 __b)
Performs a bitwise exclusive OR of two 128-bit vectors of [4 x float].
static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_or_ps(__m128 __a, __m128 __b)
Performs a bitwise OR of two 128-bit vectors of [4 x float].