13 #ifndef NO_WARN_X86_INTRINSICS
32 "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error."
35 #ifndef _MMINTRIN_H_INCLUDED
36 #define _MMINTRIN_H_INCLUDED
38 #if defined(__linux__) && defined(__ppc64__)
43 typedef __attribute__((__aligned__(8))) unsigned
long long __m64;
48 signed char as_signed_char[8];
51 long long as_long_long;
58 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
73 return (__m64)(
unsigned int)__i;
77 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
84 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
90 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
99 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
104 extern __inline __m64
105 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
111 extern __inline __m64
112 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
113 _mm_cvtsi64x_si64(
long long __i) {
117 extern __inline __m64
118 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
119 _mm_set_pi64x(
long long __i) {
126 extern __inline
long long
127 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
129 return (
long long)__i;
132 extern __inline
long long
133 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
135 return (
long long)__i;
139 extern __inline
long long
140 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
141 _mm_cvtsi64_si64x(__m64 __i) {
142 return (
long long)__i;
149 extern __inline __m64
150 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
152 __vector
signed short vm1;
153 __vector
signed char vresult;
155 vm1 = (__vector
signed short)(__vector
unsigned long long)
156 #ifdef __LITTLE_ENDIAN__
162 return (__m64)((__vector
long long)vresult)[0];
165 extern __inline __m64
166 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174 extern __inline __m64
175 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
177 __vector
signed int vm1;
178 __vector
signed short vresult;
180 vm1 = (__vector
signed int)(__vector
unsigned long long)
181 #ifdef __LITTLE_ENDIAN__
187 return (__m64)((__vector
long long)vresult)[0];
190 extern __inline __m64
191 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
199 extern __inline __m64
200 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
202 __vector
unsigned char r;
203 __vector
signed short vm1 = (__vector
signed short)(__vector
long long)
204 #ifdef __LITTLE_ENDIAN__
209 const __vector
signed short __zero = {0};
210 __vector __bool
short __select =
vec_cmplt(vm1, __zero);
211 r =
vec_packs((__vector
unsigned short)vm1, (__vector
unsigned short)vm1);
212 __vector __bool
char packsel =
vec_pack(__select, __select);
213 r =
vec_sel(r, (
const __vector
unsigned char)__zero, packsel);
214 return (__m64)((__vector
long long)r)[0];
217 extern __inline __m64
218 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
226 extern __inline __m64
227 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
230 __vector
unsigned char a,
b,
c;
235 return (__m64)((__vector
long long)
c)[1];
237 __m64_union m1, m2, res;
242 res.as_char[0] = m1.as_char[4];
243 res.as_char[1] = m2.as_char[4];
244 res.as_char[2] = m1.as_char[5];
245 res.as_char[3] = m2.as_char[5];
246 res.as_char[4] = m1.as_char[6];
247 res.as_char[5] = m2.as_char[6];
248 res.as_char[6] = m1.as_char[7];
249 res.as_char[7] = m2.as_char[7];
251 return (__m64)res.as_m64;
255 extern __inline __m64
256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
263 extern __inline __m64
264 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
266 __m64_union m1, m2, res;
271 res.as_short[0] = m1.as_short[2];
272 res.as_short[1] = m2.as_short[2];
273 res.as_short[2] = m1.as_short[3];
274 res.as_short[3] = m2.as_short[3];
276 return (__m64)res.as_m64;
279 extern __inline __m64
280 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
286 extern __inline __m64
287 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
289 __m64_union m1, m2, res;
294 res.as_int[0] = m1.as_int[1];
295 res.as_int[1] = m2.as_int[1];
297 return (__m64)res.as_m64;
300 extern __inline __m64
301 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
307 extern __inline __m64
308 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
311 __vector
unsigned char a,
b,
c;
316 return (__m64)((__vector
long long)
c)[0];
318 __m64_union m1, m2, res;
323 res.as_char[0] = m1.as_char[0];
324 res.as_char[1] = m2.as_char[0];
325 res.as_char[2] = m1.as_char[1];
326 res.as_char[3] = m2.as_char[1];
327 res.as_char[4] = m1.as_char[2];
328 res.as_char[5] = m2.as_char[2];
329 res.as_char[6] = m1.as_char[3];
330 res.as_char[7] = m2.as_char[3];
332 return (__m64)res.as_m64;
336 extern __inline __m64
337 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
343 extern __inline __m64
344 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
346 __m64_union m1, m2, res;
351 res.as_short[0] = m1.as_short[0];
352 res.as_short[1] = m2.as_short[0];
353 res.as_short[2] = m1.as_short[1];
354 res.as_short[3] = m2.as_short[1];
356 return (__m64)res.as_m64;
359 extern __inline __m64
360 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
367 extern __inline __m64
368 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
370 __m64_union m1, m2, res;
375 res.as_int[0] = m1.as_int[0];
376 res.as_int[1] = m2.as_int[0];
378 return (__m64)res.as_m64;
381 extern __inline __m64
382 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
388 extern __inline __m64
389 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
392 __vector
signed char a,
b,
c;
397 return (__m64)((__vector
long long)
c)[0];
399 __m64_union m1, m2, res;
404 res.as_char[0] = m1.as_char[0] + m2.as_char[0];
405 res.as_char[1] = m1.as_char[1] + m2.as_char[1];
406 res.as_char[2] = m1.as_char[2] + m2.as_char[2];
407 res.as_char[3] = m1.as_char[3] + m2.as_char[3];
408 res.as_char[4] = m1.as_char[4] + m2.as_char[4];
409 res.as_char[5] = m1.as_char[5] + m2.as_char[5];
410 res.as_char[6] = m1.as_char[6] + m2.as_char[6];
411 res.as_char[7] = m1.as_char[7] + m2.as_char[7];
413 return (__m64)res.as_m64;
417 extern __inline __m64
418 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
424 extern __inline __m64
425 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428 __vector
signed short a,
b,
c;
433 return (__m64)((__vector
long long)
c)[0];
435 __m64_union m1, m2, res;
440 res.as_short[0] = m1.as_short[0] + m2.as_short[0];
441 res.as_short[1] = m1.as_short[1] + m2.as_short[1];
442 res.as_short[2] = m1.as_short[2] + m2.as_short[2];
443 res.as_short[3] = m1.as_short[3] + m2.as_short[3];
445 return (__m64)res.as_m64;
449 extern __inline __m64
450 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
456 extern __inline __m64
457 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
460 __vector
signed int a,
b,
c;
465 return (__m64)((__vector
long long)
c)[0];
467 __m64_union m1, m2, res;
472 res.as_int[0] = m1.as_int[0] + m2.as_int[0];
473 res.as_int[1] = m1.as_int[1] + m2.as_int[1];
475 return (__m64)res.as_m64;
479 extern __inline __m64
480 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
486 extern __inline __m64
487 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
490 __vector
signed char a,
b,
c;
495 return (__m64)((__vector
long long)
c)[0];
497 __m64_union m1, m2, res;
502 res.as_char[0] = m1.as_char[0] - m2.as_char[0];
503 res.as_char[1] = m1.as_char[1] - m2.as_char[1];
504 res.as_char[2] = m1.as_char[2] - m2.as_char[2];
505 res.as_char[3] = m1.as_char[3] - m2.as_char[3];
506 res.as_char[4] = m1.as_char[4] - m2.as_char[4];
507 res.as_char[5] = m1.as_char[5] - m2.as_char[5];
508 res.as_char[6] = m1.as_char[6] - m2.as_char[6];
509 res.as_char[7] = m1.as_char[7] - m2.as_char[7];
511 return (__m64)res.as_m64;
515 extern __inline __m64
516 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522 extern __inline __m64
523 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
526 __vector
signed short a,
b,
c;
531 return (__m64)((__vector
long long)
c)[0];
533 __m64_union m1, m2, res;
538 res.as_short[0] = m1.as_short[0] - m2.as_short[0];
539 res.as_short[1] = m1.as_short[1] - m2.as_short[1];
540 res.as_short[2] = m1.as_short[2] - m2.as_short[2];
541 res.as_short[3] = m1.as_short[3] - m2.as_short[3];
543 return (__m64)res.as_m64;
547 extern __inline __m64
548 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554 extern __inline __m64
555 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
558 __vector
signed int a,
b,
c;
563 return (__m64)((__vector
long long)
c)[0];
565 __m64_union m1, m2, res;
570 res.as_int[0] = m1.as_int[0] - m2.as_int[0];
571 res.as_int[1] = m1.as_int[1] - m2.as_int[1];
573 return (__m64)res.as_m64;
577 extern __inline __m64
578 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
583 extern __inline __m64
584 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
586 return (__m1 + __m2);
589 extern __inline __m64
590 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592 return (__m1 - __m2);
596 extern __inline __m64
597 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
599 return (__m << __count);
602 extern __inline __m64
603 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
604 _m_psllq(__m64 __m, __m64 __count) {
608 extern __inline __m64
609 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
611 return (__m << __count);
614 extern __inline __m64
615 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
616 _m_psllqi(__m64 __m,
const int __count) {
621 extern __inline __m64
622 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
624 return (__m >> __count);
627 extern __inline __m64
628 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
629 _m_psrlq(__m64 __m, __m64 __count) {
633 extern __inline __m64
634 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
636 return (__m >> __count);
639 extern __inline __m64
640 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
641 _m_psrlqi(__m64 __m,
const int __count) {
646 extern __inline __m64
647 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
649 return (__m1 & __m2);
652 extern __inline __m64
653 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654 _m_pand(__m64 __m1, __m64 __m2) {
660 extern __inline __m64
661 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
663 return (~__m1 & __m2);
666 extern __inline __m64
667 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
673 extern __inline __m64
674 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
676 return (__m1 | __m2);
679 extern __inline __m64
680 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
681 _m_por(__m64 __m1, __m64 __m2) {
686 extern __inline __m64
687 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
689 return (__m1 ^ __m2);
692 extern __inline __m64
693 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
694 _m_pxor(__m64 __m1, __m64 __m2) {
699 extern __inline __m64
700 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
707 extern __inline __m64
708 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
710 #if defined(_ARCH_PWR6) && defined(__powerpc64__)
712 __asm__(
"cmpb %0,%1,%2;\n" :
"=r"(res) :
"r"(__m1),
"r"(__m2) :);
715 __m64_union m1, m2, res;
720 res.as_char[0] = (m1.as_char[0] == m2.as_char[0]) ? -1 : 0;
721 res.as_char[1] = (m1.as_char[1] == m2.as_char[1]) ? -1 : 0;
722 res.as_char[2] = (m1.as_char[2] == m2.as_char[2]) ? -1 : 0;
723 res.as_char[3] = (m1.as_char[3] == m2.as_char[3]) ? -1 : 0;
724 res.as_char[4] = (m1.as_char[4] == m2.as_char[4]) ? -1 : 0;
725 res.as_char[5] = (m1.as_char[5] == m2.as_char[5]) ? -1 : 0;
726 res.as_char[6] = (m1.as_char[6] == m2.as_char[6]) ? -1 : 0;
727 res.as_char[7] = (m1.as_char[7] == m2.as_char[7]) ? -1 : 0;
729 return (__m64)res.as_m64;
733 extern __inline __m64
734 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
739 extern __inline __m64
740 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
743 __vector
signed char a,
b,
c;
748 return (__m64)((__vector
long long)
c)[0];
750 __m64_union m1, m2, res;
755 res.as_char[0] = (m1.as_char[0] > m2.as_char[0]) ? -1 : 0;
756 res.as_char[1] = (m1.as_char[1] > m2.as_char[1]) ? -1 : 0;
757 res.as_char[2] = (m1.as_char[2] > m2.as_char[2]) ? -1 : 0;
758 res.as_char[3] = (m1.as_char[3] > m2.as_char[3]) ? -1 : 0;
759 res.as_char[4] = (m1.as_char[4] > m2.as_char[4]) ? -1 : 0;
760 res.as_char[5] = (m1.as_char[5] > m2.as_char[5]) ? -1 : 0;
761 res.as_char[6] = (m1.as_char[6] > m2.as_char[6]) ? -1 : 0;
762 res.as_char[7] = (m1.as_char[7] > m2.as_char[7]) ? -1 : 0;
764 return (__m64)res.as_m64;
768 extern __inline __m64
769 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
776 extern __inline __m64
777 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
780 __vector
signed short a,
b,
c;
785 return (__m64)((__vector
long long)
c)[0];
787 __m64_union m1, m2, res;
792 res.as_short[0] = (m1.as_short[0] == m2.as_short[0]) ? -1 : 0;
793 res.as_short[1] = (m1.as_short[1] == m2.as_short[1]) ? -1 : 0;
794 res.as_short[2] = (m1.as_short[2] == m2.as_short[2]) ? -1 : 0;
795 res.as_short[3] = (m1.as_short[3] == m2.as_short[3]) ? -1 : 0;
797 return (__m64)res.as_m64;
801 extern __inline __m64
802 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
807 extern __inline __m64
808 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
811 __vector
signed short a,
b,
c;
816 return (__m64)((__vector
long long)
c)[0];
818 __m64_union m1, m2, res;
823 res.as_short[0] = (m1.as_short[0] > m2.as_short[0]) ? -1 : 0;
824 res.as_short[1] = (m1.as_short[1] > m2.as_short[1]) ? -1 : 0;
825 res.as_short[2] = (m1.as_short[2] > m2.as_short[2]) ? -1 : 0;
826 res.as_short[3] = (m1.as_short[3] > m2.as_short[3]) ? -1 : 0;
828 return (__m64)res.as_m64;
832 extern __inline __m64
833 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
840 extern __inline __m64
841 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
844 __vector
signed int a,
b,
c;
849 return (__m64)((__vector
long long)
c)[0];
851 __m64_union m1, m2, res;
856 res.as_int[0] = (m1.as_int[0] == m2.as_int[0]) ? -1 : 0;
857 res.as_int[1] = (m1.as_int[1] == m2.as_int[1]) ? -1 : 0;
859 return (__m64)res.as_m64;
863 extern __inline __m64
864 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
869 extern __inline __m64
870 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
873 __vector
signed int a,
b,
c;
878 return (__m64)((__vector
long long)
c)[0];
880 __m64_union m1, m2, res;
885 res.as_int[0] = (m1.as_int[0] > m2.as_int[0]) ? -1 : 0;
886 res.as_int[1] = (m1.as_int[1] > m2.as_int[1]) ? -1 : 0;
888 return (__m64)res.as_m64;
892 extern __inline __m64
893 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
901 extern __inline __m64
902 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
904 __vector
signed char a,
b,
c;
909 return (__m64)((__vector
long long)
c)[0];
912 extern __inline __m64
913 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
919 extern __inline __m64
920 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
922 __vector
signed short a,
b,
c;
927 return (__m64)((__vector
long long)
c)[0];
930 extern __inline __m64
931 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
937 extern __inline __m64
938 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
940 __vector
unsigned char a,
b,
c;
945 return (__m64)((__vector
long long)
c)[0];
948 extern __inline __m64
949 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
956 extern __inline __m64
957 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
959 __vector
unsigned short a,
b,
c;
961 a = (__vector
unsigned short)
vec_splats(__m1);
964 return (__m64)((__vector
long long)
c)[0];
967 extern __inline __m64
968 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
975 extern __inline __m64
976 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
978 __vector
signed char a,
b,
c;
983 return (__m64)((__vector
long long)
c)[0];
986 extern __inline __m64
987 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
994 extern __inline __m64
995 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
997 __vector
signed short a,
b,
c;
1002 return (__m64)((__vector
long long)
c)[0];
1005 extern __inline __m64
1006 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1013 extern __inline __m64
1014 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1016 __vector
unsigned char a,
b,
c;
1018 a = (__vector
unsigned char)
vec_splats(__m1);
1021 return (__m64)((__vector
long long)
c)[0];
1024 extern __inline __m64
1025 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1032 extern __inline __m64
1033 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1035 __vector
unsigned short a,
b,
c;
1037 a = (__vector
unsigned short)
vec_splats(__m1);
1040 return (__m64)((__vector
long long)
c)[0];
1043 extern __inline __m64
1044 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1052 extern __inline __m64
1053 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1055 __vector
signed short a,
b;
1056 __vector
signed int c;
1057 __vector
signed int zero = {0, 0, 0, 0};
1061 c = vec_vmsumshm(a,
b, zero);
1062 return (__m64)((__vector
long long)
c)[0];
1065 extern __inline __m64
1066 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1072 extern __inline __m64
1073 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1075 __vector
signed short a,
b;
1076 __vector
signed short c;
1077 __vector
signed int w0, w1;
1078 __vector
unsigned char xform1 = {
1079 #ifdef __LITTLE_ENDIAN__
1080 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A,
1081 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F
1083 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x00,
1084 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15
1091 w0 = vec_vmulesh(a,
b);
1092 w1 = vec_vmulosh(a,
b);
1093 c = (__vector
signed short)
vec_perm(w0, w1, xform1);
1095 return (__m64)((__vector
long long)
c)[0];
1098 extern __inline __m64
1099 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1106 extern __inline __m64
1107 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1109 __vector
signed short a,
b,
c;
1114 return (__m64)((__vector
long long)
c)[0];
1117 extern __inline __m64
1118 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1124 extern __inline __m64
1125 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1127 __vector
signed short m, r;
1128 __vector
unsigned short c;
1130 if (__count <= 15) {
1132 c = (__vector
unsigned short)
vec_splats((
unsigned short)__count);
1133 r =
vec_sl(m, (__vector
unsigned short)
c);
1134 return (__m64)((__vector
long long)r)[0];
1139 extern __inline __m64
1140 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1141 _m_psllw(__m64 __m, __m64 __count) {
1145 extern __inline __m64
1146 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1152 extern __inline __m64
1153 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1159 extern __inline __m64
1160 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1166 res.as_int[0] = m.as_int[0] << __count;
1167 res.as_int[1] = m.as_int[1] << __count;
1168 return (res.as_m64);
1171 extern __inline __m64
1172 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1173 _m_pslld(__m64 __m, __m64 __count) {
1177 extern __inline __m64
1178 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1184 extern __inline __m64
1185 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1191 extern __inline __m64
1192 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1194 __vector
signed short m, r;
1195 __vector
unsigned short c;
1197 if (__count <= 15) {
1199 c = (__vector
unsigned short)
vec_splats((
unsigned short)__count);
1200 r =
vec_sra(m, (__vector
unsigned short)
c);
1201 return (__m64)((__vector
long long)r)[0];
1206 extern __inline __m64
1207 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1208 _m_psraw(__m64 __m, __m64 __count) {
1212 extern __inline __m64
1213 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1219 extern __inline __m64
1220 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1226 extern __inline __m64
1227 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1233 res.as_int[0] = m.as_int[0] >> __count;
1234 res.as_int[1] = m.as_int[1] >> __count;
1235 return (res.as_m64);
1238 extern __inline __m64
1239 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1240 _m_psrad(__m64 __m, __m64 __count) {
1244 extern __inline __m64
1245 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1251 extern __inline __m64
1252 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1258 extern __inline __m64
1259 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1261 __vector
unsigned short m, r;
1262 __vector
unsigned short c;
1264 if (__count <= 15) {
1265 m = (__vector
unsigned short)
vec_splats(__m);
1266 c = (__vector
unsigned short)
vec_splats((
unsigned short)__count);
1267 r =
vec_sr(m, (__vector
unsigned short)
c);
1268 return (__m64)((__vector
long long)r)[0];
1273 extern __inline __m64
1274 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1275 _m_psrlw(__m64 __m, __m64 __count) {
1279 extern __inline __m64
1280 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1286 extern __inline __m64
1287 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1293 extern __inline __m64
1294 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1300 res.as_int[0] = (
unsigned int)m.as_int[0] >> __count;
1301 res.as_int[1] = (
unsigned int)m.as_int[1] >> __count;
1302 return (res.as_m64);
1305 extern __inline __m64
1306 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1307 _m_psrld(__m64 __m, __m64 __count) {
1311 extern __inline __m64
1312 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1318 extern __inline __m64
1319 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1326 extern __inline __m64
1327 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1331 res.as_int[0] = __i0;
1332 res.as_int[1] = __i1;
1333 return (res.as_m64);
1337 extern __inline __m64
1338 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1339 _mm_set_pi16(
short __w3,
short __w2,
short __w1,
short __w0) {
1342 res.as_short[0] = __w0;
1343 res.as_short[1] = __w1;
1344 res.as_short[2] = __w2;
1345 res.as_short[3] = __w3;
1346 return (res.as_m64);
1350 extern __inline __m64
1351 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1352 _mm_set_pi8(
char __b7,
char __b6,
char __b5,
char __b4,
char __b3,
1353 char __b2,
char __b1,
char __b0) {
1356 res.as_char[0] = __b0;
1357 res.as_char[1] = __b1;
1358 res.as_char[2] = __b2;
1359 res.as_char[3] = __b3;
1360 res.as_char[4] = __b4;
1361 res.as_char[5] = __b5;
1362 res.as_char[6] = __b6;
1363 res.as_char[7] = __b7;
1364 return (res.as_m64);
1368 extern __inline __m64
1369 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1373 res.as_int[0] = __i0;
1374 res.as_int[1] = __i1;
1375 return (res.as_m64);
1378 extern __inline __m64
1379 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1380 _mm_setr_pi16(
short __w0,
short __w1,
short __w2,
short __w3) {
1384 extern __inline __m64
1385 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1386 _mm_setr_pi8(
char __b0,
char __b1,
char __b2,
char __b3,
char __b4,
1387 char __b5,
char __b6,
char __b7) {
1388 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1392 extern __inline __m64
1393 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1397 res.as_int[0] = __i;
1398 res.as_int[1] = __i;
1399 return (res.as_m64);
1403 extern __inline __m64
1404 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1407 __vector
signed short w;
1410 return (__m64)((__vector
long long)w)[0];
1414 res.as_short[0] = __w;
1415 res.as_short[1] = __w;
1416 res.as_short[2] = __w;
1417 res.as_short[3] = __w;
1418 return (res.as_m64);
1423 extern __inline __m64
1424 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
1427 __vector
signed char b;
1430 return (__m64)((__vector
long long)
b)[0];
1434 res.as_char[0] =
__b;
1435 res.as_char[1] =
__b;
1436 res.as_char[2] =
__b;
1437 res.as_char[3] =
__b;
1438 res.as_char[4] =
__b;
1439 res.as_char[5] =
__b;
1440 res.as_char[6] =
__b;
1441 res.as_char[7] =
__b;
1442 return (res.as_m64);
1447 #include_next <mmintrin.h>
__device__ __2f16 float c
static __inline__ vector unsigned char __ATTRS_o_ai vec_sr(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpeq(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sra(vector signed char __a, vector unsigned char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a)
static __inline__ vector signed char __ATTRS_o_ai vec_mergel(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_subs(vector signed char __a, vector signed char __b)
static __inline__ vector float vector float __b
static __inline__ vector signed char __ATTRS_o_ai vec_adds(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_perm(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector signed char __ATTRS_o_ai vec_sel(vector signed char __a, vector signed char __b, vector unsigned char __c)
static __inline__ vector bool char __ATTRS_o_ai vec_cmplt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_pack(vector signed short __a, vector signed short __b)
static __inline__ vector unsigned char __ATTRS_o_ai vec_sl(vector unsigned char __a, vector unsigned char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_add(vector signed char __a, vector signed char __b)
static __inline__ vector bool char __ATTRS_o_ai vec_cmpgt(vector signed char __a, vector signed char __b)
static __inline__ vector signed char __ATTRS_o_ai vec_packs(vector short __a, vector short __b)
static __inline__ vector signed char __ATTRS_o_ai vec_sub(vector signed char __a, vector signed char __b)
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, __m64 __b)
Subtracts signed or unsigned 64-bit integer values and writes the difference to the corresponding bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, __m64 __b)
Adds two signed or unsigned 64-bit integer values, returning the lower 64 bits of the sum.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi16(short __w)
Constructs a 64-bit integer vector of [4 x i16], with each of the 16-bit integer vector elements set ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi32(__m64 __m1, __m64 __m2)
Adds each 32-bit integer element of the first 64-bit integer vector of [2 x i32] to the corresponding...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 16-bit integer va...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 8-bit integer val...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_madd_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_si64(__m64 __m, __m64 __count)
Right-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit unsigned integer element of the second 64-bit integer vector of [4 x i16] from ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit signed integer element of the first 64-bit integer vector of [8 x i8] to the correspo...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] and interleaves them into a 64-...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit signed integer element of the second 64-bit integer vector of [4 x i16] from th...
long long __m64 __attribute__((__vector_size__(8), __aligned__(8)))
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_si64(__m64 __m, __m64 __count)
Left-shifts the first 64-bit integer parameter by the number of bits specified by the second 64-bit i...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit integer element of the second 64-bit integer vector of [8 x i8] from the corresp...
static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvtm64_si64(__m64 __m)
Casts a 64-bit integer vector into a 64-bit signed integer value.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pi8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit signed integer element of the second 64-bit integer vector of [8 x i8] from the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] to the corres...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pu16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit un...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi32_si64(int __i)
Constructs a 64-bit integer vector, setting the lower 32 bits to the value of the 32-bit integer para...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi32(int __i)
Constructs a 64-bit integer vector of [2 x i32], with each of the 32-bit integer vector elements set ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi16(__m64 __m, int __count)
Left-shifts each 16-bit signed integer element of a 64-bit integer vector of [4 x i16] by the number ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set1_pi8(char __b)
Constructs a 64-bit integer vector of [8 x i8], with each of the 8-bit integer vector elements set to...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi32(__m64 __m1, __m64 __m2)
Converts 32-bit signed integers from both 64-bit integer vector parameters of [2 x i32] into 16-bit s...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi16(__m64 __m1, __m64 __m2)
Adds each 16-bit integer element of the first 64-bit integer vector of [4 x i16] to the corresponding...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_subs_pu8(__m64 __m1, __m64 __m2)
Subtracts each 8-bit unsigned integer element of the second 64-bit integer vector of [8 x i8] from th...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu8(__m64 __m1, __m64 __m2)
Adds each 8-bit unsigned integer element of the first 64-bit integer vector of [8 x i8] to the corres...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_or_si64(__m64 __m1, __m64 __m2)
Performs a bitwise OR of two 64-bit integer vectors.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_si64(__m64 __m, int __count)
Left-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the se...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srl_pi32(__m64 __m, __m64 __count)
Right-shifts each 32-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [4 x i16] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
Compares the 16-bit integer elements of two 64-bit integer vectors of [4 x i16] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_si64(__m64 __m, int __count)
Right-shifts the first parameter, which is a 64-bit integer, by the number of bits specified by the s...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
Constructs a 64-bit integer vector initialized with the specified 16-bit integer values.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
Compares the 32-bit integer elements of two 64-bit integer vectors of [2 x i32] to determine if the e...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi32(__m64 __m, __m64 __count)
Left-shifts each 32-bit signed integer element of the first parameter, which is a 64-bit integer vect...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setr_pi32(int __i0, int __i1)
Constructs a 64-bit integer vector, initialized in reverse order with the specified 32-bit integer va...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_pi8(__m64 __m1, __m64 __m2)
Adds each 8-bit integer element of the first 64-bit integer vector of [8 x i8] to the corresponding 8...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srai_pi16(__m64 __m, int __count)
Right-shifts each 16-bit integer element of a 64-bit integer vector of [4 x i16] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi32(int __i1, int __i0)
Constructs a 64-bit integer vector initialized with the specified 32-bit integer values.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_adds_pu16(__m64 __m1, __m64 __m2)
Adds each 16-bit unsigned integer element of the first 64-bit integer vector of [4 x i16] to the corr...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_xor_si64(__m64 __m1, __m64 __m2)
Performs a bitwise exclusive OR of two 64-bit integer vectors.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtsi64_m64(long long __i)
Casts a 64-bit signed integer value into a 64-bit integer vector.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
Unpacks the upper 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mullo_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_slli_pi32(__m64 __m, int __count)
Left-shifts each 32-bit signed integer element of a 64-bit integer vector of [2 x i32] by the number ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
Unpacks the lower 32 bits from two 64-bit integer vectors of [2 x i32] and interleaves them into a 64...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_setzero_si64(void)
Constructs a 64-bit integer vector initialized to zero.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sll_pi16(__m64 __m, __m64 __count)
Left-shifts each 16-bit signed integer element of the first parameter, which is a 64-bit integer vect...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sra_pi16(__m64 __m, __m64 __count)
Right-shifts each 16-bit integer element of the first parameter, which is a 64-bit integer vector of ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi32(__m64 __m1, __m64 __m2)
Subtracts each 32-bit integer element of the second 64-bit integer vector of [2 x i32] from the corre...
static __inline__ int __DEFAULT_FN_ATTRS _mm_cvtsi64_si32(__m64 __m)
Returns the lower 32 bits of a 64-bit integer vector as a 32-bit signed integer.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
Compares the 8-bit integer elements of two 64-bit integer vectors of [8 x i8] to determine if the ele...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0)
Constructs a 64-bit integer vector initialized with the specified 8-bit integer values.
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_packs_pi16(__m64 __m1, __m64 __m2)
Converts 16-bit signed integers from both 64-bit integer vector parameters of [4 x i16] into 8-bit si...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_srli_pi32(__m64 __m, int __count)
Right-shifts each 32-bit integer element of a 64-bit integer vector of [2 x i32] by the number of bit...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhi_pi16(__m64 __m1, __m64 __m2)
Multiplies each 16-bit signed integer element of the first 64-bit integer vector of [4 x i16] by the ...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_andnot_si64(__m64 __m1, __m64 __m2)
Performs a bitwise NOT of the first 64-bit integer vector, and then performs a bitwise AND of the int...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_pi16(__m64 __m1, __m64 __m2)
Subtracts each 16-bit integer element of the second 64-bit integer vector of [4 x i16] from the corre...
static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_and_si64(__m64 __m1, __m64 __m2)
Performs a bitwise AND of two 64-bit integer vectors.
#define as_char(x)
OpenCL v1.1/1.2/2.0 s6.2.4.2 - as_type operators Reinterprets a data type as another data type of the...