13#ifndef NO_WARN_X86_INTRINSICS
32 "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
35#ifndef _MMINTRIN_H_INCLUDED
36#define _MMINTRIN_H_INCLUDED
38#if defined(__linux__) && defined(__ppc64__)
43typedef __attribute__((__aligned__(8))) unsigned
long long __m64;
48 signed char as_signed_char[8];
51 long long as_long_long;
58 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
73 return (__m64)(
unsigned int)__i;
77 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
90 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
105 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
112 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
113 _mm_cvtsi64x_si64(
long long __i) {
118 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
119 _mm_set_pi64x(
long long __i) {
126extern __inline
long long
127 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
129 return (
long long)__i;
132extern __inline
long long
133 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
135 return (
long long)__i;
139extern __inline
long long
140 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
141 _mm_cvtsi64_si64x(__m64 __i) {
142 return (
long long)__i;
150 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
152 __vector
signed short vm1;
153 __vector
signed char vresult;
155 vm1 = (__vector
signed short)(__vector
unsigned long long)
156#ifdef __LITTLE_ENDIAN__
162 return (__m64)((__vector
long long)vresult)[0];
166 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
175 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
177 __vector
signed int vm1;
178 __vector
signed short vresult;
180 vm1 = (__vector
signed int)(__vector
unsigned long long)
181#ifdef __LITTLE_ENDIAN__
187 return (__m64)((__vector
long long)vresult)[0];
191 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
200 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 __vector
unsigned char r;
203 __vector
signed short vm1 = (__vector
signed short)(__vector
long long)
204#ifdef __LITTLE_ENDIAN__
209 const __vector
signed short __zero = {0};
210 __vector __bool
short __select =
vec_cmplt(vm1, __zero);
211 r =
vec_packs((__vector
unsigned short)vm1, (__vector
unsigned short)vm1);
212 __vector __bool
char packsel =
vec_pack(__select, __select);
213 r =
vec_sel(r, (
const __vector
unsigned char)__zero, packsel);
214 return (__m64)((__vector
long long)r)[0];
218 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
227 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
230 __vector
unsigned char a,
b,
c;
235 return (__m64)((__vector
long long)
c)[1];
237 __m64_union m1, m2, res;
242 res.as_char[0] = m1.as_char[4];
243 res.as_char[1] = m2.as_char[4];
244 res.as_char[2] = m1.as_char[5];
245 res.as_char[3] = m2.as_char[5];
246 res.as_char[4] = m1.as_char[6];
247 res.as_char[5] = m2.as_char[6];
248 res.as_char[6] = m1.as_char[7];
249 res.as_char[7] = m2.as_char[7];
251 return (__m64)res.as_m64;
256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
264 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
266 __m64_union m1, m2, res;
271 res.as_short[0] = m1.as_short[2];
272 res.as_short[1] = m2.as_short[2];
273 res.as_short[2] = m1.as_short[3];
274 res.as_short[3] = m2.as_short[3];
276 return (__m64)res.as_m64;
280 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
287 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
289 __m64_union m1, m2, res;
294 res.as_int[0] = m1.as_int[1];
295 res.as_int[1] = m2.as_int[1];
297 return (__m64)res.as_m64;
301 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
311 __vector
unsigned char a,
b,
c;
316 return (__m64)((__vector
long long)
c)[0];
318 __m64_union m1, m2, res;
323 res.as_char[0] = m1.as_char[0];
324 res.as_char[1] = m2.as_char[0];
325 res.as_char[2] = m1.as_char[1];
326 res.as_char[3] = m2.as_char[1];
327 res.as_char[4] = m1.as_char[2];
328 res.as_char[5] = m2.as_char[2];
329 res.as_char[6] = m1.as_char[3];
330 res.as_char[7] = m2.as_char[3];
332 return (__m64)res.as_m64;
337 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
344 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
346 __m64_union m1, m2, res;
351 res.as_short[0] = m1.as_short[0];
352 res.as_short[1] = m2.as_short[0];
353 res.as_short[2] = m1.as_short[1];
354 res.as_short[3] = m2.as_short[1];
356 return (__m64)res.as_m64;
360 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
368 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
370 __m64_union m1, m2, res;
375 res.as_int[0] = m1.as_int[0];
376 res.as_int[1] = m2.as_int[0];
378 return (__m64)res.as_m64;
382 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
389 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
392 __vector
signed char a,
b,
c;
397 return (__m64)((__vector
long long)
c)[0];
399 __m64_union m1, m2, res;
404 res.as_char[0] = m1.as_char[0] + m2.as_char[0];
405 res.as_char[1] = m1.as_char[1] + m2.as_char[1];
406 res.as_char[2] = m1.as_char[2] + m2.as_char[2];
407 res.as_char[3] = m1.as_char[3] + m2.as_char[3];
408 res.as_char[4] = m1.as_char[4] + m2.as_char[4];
409 res.as_char[5] = m1.as_char[5] + m2.as_char[5];
410 res.as_char[6] = m1.as_char[6] + m2.as_char[6];
411 res.as_char[7] = m1.as_char[7] + m2.as_char[7];
413 return (__m64)res.as_m64;
418 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
425 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428 __vector
signed short a,
b,
c;
433 return (__m64)((__vector
long long)
c)[0];
435 __m64_union m1, m2, res;
440 res.as_short[0] = m1.as_short[0] + m2.as_short[0];
441 res.as_short[1] = m1.as_short[1] + m2.as_short[1];
442 res.as_short[2] = m1.as_short[2] + m2.as_short[2];
443 res.as_short[3] = m1.as_short[3] + m2.as_short[3];
445 return (__m64)res.as_m64;
450 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
457 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
460 __vector
signed int a,
b,
c;
465 return (__m64)((__vector
long long)
c)[0];
467 __m64_union m1, m2, res;
472 res.as_int[0] = m1.as_int[0] + m2.as_int[0];
473 res.as_int[1] = m1.as_int[1] + m2.as_int[1];
475 return (__m64)res.as_m64;
480 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
487 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
490 __vector
signed char a,
b,
c;
495 return (__m64)((__vector
long long)
c)[0];
497 __m64_union m1, m2, res;
502 res.as_char[0] = m1.as_char[0] - m2.as_char[0];
503 res.as_char[1] = m1.as_char[1] - m2.as_char[1];
504 res.as_char[2] = m1.as_char[2] - m2.as_char[2];
505 res.as_char[3] = m1.as_char[3] - m2.as_char[3];
506 res.as_char[4] = m1.as_char[4] - m2.as_char[4];
507 res.as_char[5] = m1.as_char[5] - m2.as_char[5];
508 res.as_char[6] = m1.as_char[6] - m2.as_char[6];
509 res.as_char[7] = m1.as_char[7] - m2.as_char[7];
511 return (__m64)res.as_m64;
516 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
523 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
526 __vector
signed short a,
b,
c;
531 return (__m64)((__vector
long long)
c)[0];
533 __m64_union m1, m2, res;
538 res.as_short[0] = m1.as_short[0] - m2.as_short[0];
539 res.as_short[1] = m1.as_short[1] - m2.as_short[1];
540 res.as_short[2] = m1.as_short[2] - m2.as_short[2];
541 res.as_short[3] = m1.as_short[3] - m2.as_short[3];
543 return (__m64)res.as_m64;
548 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
555 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
558 __vector
signed int a,
b,
c;
563 return (__m64)((__vector
long long)
c)[0];
565 __m64_union m1, m2, res;
570 res.as_int[0] = m1.as_int[0] - m2.as_int[0];
571 res.as_int[1] = m1.as_int[1] - m2.as_int[1];
573 return (__m64)res.as_m64;
578 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
584 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
586 return (__m1 + __m2);
590 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592 return (__m1 - __m2);
597 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
599 return (__m << __count);
603 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
604 _m_psllq(__m64 __m, __m64 __count) {
609 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
611 return (__m << __count);
615 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
616 _m_psllqi(__m64 __m,
const int __count) {
622 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
624 return (__m >> __count);
628 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
629 _m_psrlq(__m64 __m, __m64 __count) {
634 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636 return (__m >> __count);
640 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
641 _m_psrlqi(__m64 __m,
const int __count) {
647 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
649 return (__m1 & __m2);
653 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654 _m_pand(__m64 __m1, __m64 __m2) {
661 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
663 return (~__m1 & __m2);
667 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
674 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
676 return (__m1 | __m2);
680 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
681 _m_por(__m64 __m1, __m64 __m2) {
687 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
689 return (__m1 ^ __m2);
693 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
694 _m_pxor(__m64 __m1, __m64 __m2) {
700 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
708 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
710#if defined(_ARCH_PWR6) && defined(__powerpc64__)
712 __asm__(
"cmpb %0,%1,%2;\n" :
"=r"(res) :
"r"(__m1),
"r"(__m2) :);
715 __m64_union m1, m2, res;
720 res.as_char[0] = (m1.as_char[0] == m2.as_char[0]) ? -1 : 0;
721 res.as_char[1] = (m1.as_char[1] == m2.as_char[1]) ? -1 : 0;
722 res.as_char[2] = (m1.as_char[2] == m2.as_char[2]) ? -1 : 0;
723 res.as_char[3] = (m1.as_char[3] == m2.as_char[3]) ? -1 : 0;
724 res.as_char[4] = (m1.as_char[4] == m2.as_char[4]) ? -1 : 0;
725 res.as_char[5] = (m1.as_char[5] == m2.as_char[5]) ? -1 : 0;
726 res.as_char[6] = (m1.as_char[6] == m2.as_char[6]) ? -1 : 0;
727 res.as_char[7] = (m1.as_char[7] == m2.as_char[7]) ? -1 : 0;
729 return (__m64)res.as_m64;
734 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
740 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
743 __vector
signed char a,
b,
c;
748 return (__m64)((__vector
long long)
c)[0];
750 __m64_union m1, m2, res;
755 res.as_char[0] = (m1.as_char[0] > m2.as_char[0]) ? -1 : 0;
756 res.as_char[1] = (m1.as_char[1] > m2.as_char[1]) ? -1 : 0;
757 res.as_char[2] = (m1.as_char[2] > m2.as_char[2]) ? -1 : 0;
758 res.as_char[3] = (m1.as_char[3] > m2.as_char[3]) ? -1 : 0;
759 res.as_char[4] = (m1.as_char[4] > m2.as_char[4]) ? -1 : 0;
760 res.as_char[5] = (m1.as_char[5] > m2.as_char[5]) ? -1 : 0;
761 res.as_char[6] = (m1.as_char[6] > m2.as_char[6]) ? -1 : 0;
762 res.as_char[7] = (m1.as_char[7] > m2.as_char[7]) ? -1 : 0;
764 return (__m64)res.as_m64;
769 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
777 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
780 __vector
signed short a,
b,
c;
785 return (__m64)((__vector
long long)
c)[0];
787 __m64_union m1, m2, res;
792 res.as_short[0] = (m1.as_short[0] == m2.as_short[0]) ? -1 : 0;
793 res.as_short[1] = (m1.as_short[1] == m2.as_short[1]) ? -1 : 0;
794 res.as_short[2] = (m1.as_short[2] == m2.as_short[2]) ? -1 : 0;
795 res.as_short[3] = (m1.as_short[3] == m2.as_short[3]) ? -1 : 0;
797 return (__m64)res.as_m64;
802 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
808 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
811 __vector
signed short a,
b,
c;
816 return (__m64)((__vector
long long)
c)[0];
818 __m64_union m1, m2, res;
823 res.as_short[0] = (m1.as_short[0] > m2.as_short[0]) ? -1 : 0;
824 res.as_short[1] = (m1.as_short[1] > m2.as_short[1]) ? -1 : 0;
825 res.as_short[2] = (m1.as_short[2] > m2.as_short[2]) ? -1 : 0;
826 res.as_short[3] = (m1.as_short[3] > m2.as_short[3]) ? -1 : 0;
828 return (__m64)res.as_m64;
833 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
841 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
844 __vector
signed int a,
b,
c;
849 return (__m64)((__vector
long long)
c)[0];
851 __m64_union m1, m2, res;
856 res.as_int[0] = (m1.as_int[0] == m2.as_int[0]) ? -1 : 0;
857 res.as_int[1] = (m1.as_int[1] == m2.as_int[1]) ? -1 : 0;
859 return (__m64)res.as_m64;
864 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
870 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
873 __vector
signed int a,
b,
c;
878 return (__m64)((__vector
long long)
c)[0];
880 __m64_union m1, m2, res;
885 res.as_int[0] = (m1.as_int[0] > m2.as_int[0]) ? -1 : 0;
886 res.as_int[1] = (m1.as_int[1] > m2.as_int[1]) ? -1 : 0;
888 return (__m64)res.as_m64;
893 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
902 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
904 __vector
signed char a,
b,
c;
909 return (__m64)((__vector
long long)
c)[0];
913 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
920 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
922 __vector
signed short a,
b,
c;
927 return (__m64)((__vector
long long)
c)[0];
931 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
938 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
940 __vector
unsigned char a,
b,
c;
945 return (__m64)((__vector
long long)
c)[0];
949 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
957 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
959 __vector
unsigned short a,
b,
c;
961 a = (__vector
unsigned short)
vec_splats(__m1);
964 return (__m64)((__vector
long long)
c)[0];
968 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
976 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
978 __vector
signed char a,
b,
c;
983 return (__m64)((__vector
long long)
c)[0];
987 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
995 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
997 __vector
signed short a,
b,
c;
1002 return (__m64)((__vector
long long)
c)[0];
1005extern __inline __m64
1006 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1013extern __inline __m64
1014 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1016 __vector
unsigned char a,
b,
c;
1018 a = (__vector
unsigned char)
vec_splats(__m1);
1021 return (__m64)((__vector
long long)
c)[0];
1024extern __inline __m64
1025 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1032extern __inline __m64
1033 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1035 __vector
unsigned short a,
b,
c;
1037 a = (__vector
unsigned short)
vec_splats(__m1);
1040 return (__m64)((__vector
long long)
c)[0];
1043extern __inline __m64
1044 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1052extern __inline __m64
1053 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1055 __vector
signed short a,
b;
1056 __vector
signed int c;
1057 __vector
signed int zero = {0, 0, 0, 0};
1061 c = vec_vmsumshm(a,
b, zero);
1062 return (__m64)((__vector
long long)
c)[0];
1065extern __inline __m64
1066 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1072extern __inline __m64
1073 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1075 __vector
signed short a,
b;
1076 __vector
signed short c;
1077 __vector
signed int w0, w1;
1078 __vector
unsigned char xform1 = {
1079#ifdef __LITTLE_ENDIAN__
1080 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A,
1081 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1083 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x00,
1084 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15
1091 w0 = vec_vmulesh(a,
b);
1092 w1 = vec_vmulosh(a,
b);
1093 c = (__vector
signed short)
vec_perm(w0, w1, xform1);
1095 return (__m64)((__vector
long long)
c)[0];
1098extern __inline __m64
1099 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1106extern __inline __m64
1107 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1109 __vector
signed short a,
b,
c;
1114 return (__m64)((__vector
long long)
c)[0];
1117extern __inline __m64
1118 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1124extern __inline __m64
1125 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1127 __vector
signed short m, r;
1128 __vector
unsigned short c;
1130 if (__count <= 15) {
1132 c = (__vector
unsigned short)
vec_splats((
unsigned short)__count);
1133 r =
vec_sl(m, (__vector
unsigned short)
c);
1134 return (__m64)((__vector
long long)r)[0];
1139extern __inline __m64
1140 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1141 _m_psllw(__m64 __m, __m64 __count) {
1145extern __inline __m64
1146 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1152extern __inline __m64
1153 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1159extern __inline __m64
1160 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1166 res.as_int[0] = m.as_int[0] << __count;
1167 res.as_int[1] = m.as_int[1] << __count;
1168 return (res.as_m64);
1171extern __inline __m64
1172 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1173 _m_pslld(__m64 __m, __m64 __count) {
1177extern __inline __m64
1178 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1184extern __inline __m64
1185 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1191extern __inline __m64
1192 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1194 __vector
signed short m, r;
1195 __vector
unsigned short c;
1197 if (__count <= 15) {
1199 c = (__vector
unsigned short)
vec_splats((
unsigned short)__count);
1200 r =
vec_sra(m, (__vector
unsigned short)
c);
1201 return (__m64)((__vector
long long)r)[0];
1206extern __inline __m64
1207 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1208 _m_psraw(__m64 __m, __m64 __count) {
1212extern __inline __m64
1213 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1219extern __inline __m64
1220 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1226extern __inline __m64
1227 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1233 res.as_int[0] = m.as_int[0] >> __count;
1234 res.as_int[1] = m.as_int[1] >> __count;
1235 return (res.as_m64);
1238extern __inline __m64
1239 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1240 _m_psrad(__m64 __m, __m64 __count) {
1244extern __inline __m64
1245 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1251extern __inline __m64
1252 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1258extern __inline __m64
1259 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1261 __vector
unsigned short m, r;
1262 __vector
unsigned short c;
1264 if (__count <= 15) {
1265 m = (__vector
unsigned short)
vec_splats(__m);
1266 c = (__vector
unsigned short)
vec_splats((
unsigned short)__count);
1267 r =
vec_sr(m, (__vector
unsigned short)
c);
1268 return (__m64)((__vector
long long)r)[0];
1273extern __inline __m64
1274 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1275 _m_psrlw(__m64 __m, __m64 __count) {
1279extern __inline __m64
1280 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1286extern __inline __m64
1287 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1293extern __inline __m64
1294 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1300 res.as_int[0] = (
unsigned int)m.as_int[0] >> __count;
1301 res.as_int[1] = (
unsigned int)m.as_int[1] >> __count;
1302 return (res.as_m64);
1305extern __inline __m64
1306 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1307 _m_psrld(__m64 __m, __m64 __count) {
1311extern __inline __m64
1312 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1318extern __inline __m64
1319 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1326extern __inline __m64
1327 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1331 res.as_int[0] = __i0;
1332 res.as_int[1] = __i1;
1333 return (res.as_m64);
1337extern __inline __m64
1338 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1339 _mm_set_pi16(
short __w3,
short __w2,
short __w1,
short __w0) {
1342 res.as_short[0] = __w0;
1343 res.as_short[1] = __w1;
1344 res.as_short[2] = __w2;
1345 res.as_short[3] = __w3;
1346 return (res.as_m64);
1350extern __inline __m64
1351 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm_set_pi8(
char __b7,
char __b6,
char __b5,
char __b4,
char __b3,
1353 char __b2,
char __b1,
char __b0) {
1356 res.as_char[0] = __b0;
1357 res.as_char[1] = __b1;
1358 res.as_char[2] = __b2;
1359 res.as_char[3] = __b3;
1360 res.as_char[4] = __b4;
1361 res.as_char[5] = __b5;
1362 res.as_char[6] = __b6;
1363 res.as_char[7] = __b7;
1364 return (res.as_m64);
1368extern __inline __m64
1369 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1373 res.as_int[0] = __i0;
1374 res.as_int[1] = __i1;
1375 return (res.as_m64);
1378extern __inline __m64
1379 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1380 _mm_setr_pi16(
short __w0,
short __w1,
short __w2,
short __w3) {
1384extern __inline __m64
1385 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1386 _mm_setr_pi8(
char __b0,
char __b1,
char __b2,
char __b3,
char __b4,
1387 char __b5,
char __b6,
char __b7) {
1388 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1392extern __inline __m64
1393 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1397 res.as_int[0] = __i;
1398 res.as_int[1] = __i;
1399 return (res.as_m64);
1403extern __inline __m64
1404 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1407 __vector
signed short w;
1410 return (__m64)((__vector
long long)w)[0];
1414 res.as_short[0] = __w;
1415 res.as_short[1] = __w;
1416 res.as_short[2] = __w;
1417 res.as_short[3] = __w;
1418 return (res.as_m64);
1423extern __inline __m64
1424 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1427 __vector
signed char b;
1430 return (__m64)((__vector
long long)
b)[0];
1434 res.as_char[0] =
__b;
1435 res.as_char[1] =
__b;
1436 res.as_char[2] =
__b;
1437 res.as_char[3] =
__b;
1438 res.as_char[4] =
__b;
1439 res.as_char[5] =
__b;
1440 res.as_char[6] =
__b;
1441 res.as_char[7] =
__b;
1442 return (res.as_m64);
1447#include_next <mmintrin.h>
__device__ __2f16 float c
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sra(vector signed char __a, vector unsigned char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
static __inline__ vector float vector float __b
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b)
Subtracts signed or unsigned 64-bit integer values and writes the difference to the corresponding bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b)
Adds two signed or unsigned 64-bit integer values, returning the lower 64 bits of the sum.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit signed integer element of the first 64-bit integer vector of [8 x i8] to the correspo...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit signed integer element of the second 64-bit integer vector of [4 x i16] from th...
long long __m64 __attribute__((__vector_size__(8), __aligned__(8)))
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit signed integer element of the second 64-bit integer vector of [8 x i8] from the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] to the corres...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit un...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts 32-bit signed integers from both 64-bit integer vector parameters of [2 x i32] into 16-bit s...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x i8] to the corres...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds each 16-bit unsigned integer element of the first 64-bit integer vector of [4 x i16] to the corr...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit si...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
#define as_char(x)
OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators Reinterprets a data type as another data type of the...