LCOV - code coverage report
Current view: top level - usr/lib/gcc/x86_64-linux-gnu/7/include - emmintrin.h (source / functions) Hit Total Coverage
Test: Coverage inastemp Lines: 36 36 100.0 %
Date: 2022-03-17 09:48:28 Functions: 0 0 -

          Line data    Source code
       1             : /* Copyright (C) 2003-2017 Free Software Foundation, Inc.
       2             : 
       3             :    This file is part of GCC.
       4             : 
       5             :    GCC is free software; you can redistribute it and/or modify
       6             :    it under the terms of the GNU General Public License as published by
       7             :    the Free Software Foundation; either version 3, or (at your option)
       8             :    any later version.
       9             : 
      10             :    GCC is distributed in the hope that it will be useful,
      11             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             :    GNU General Public License for more details.
      14             : 
      15             :    Under Section 7 of GPL version 3, you are granted additional
      16             :    permissions described in the GCC Runtime Library Exception, version
      17             :    3.1, as published by the Free Software Foundation.
      18             : 
      19             :    You should have received a copy of the GNU General Public License and
      20             :    a copy of the GCC Runtime Library Exception along with this program;
      21             :    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      22             :    <http://www.gnu.org/licenses/>.  */
      23             : 
      24             : /* Implemented from the specification included in the Intel C++ Compiler
      25             :    User Guide and Reference, version 9.0.  */
      26             : 
      27             : #ifndef _EMMINTRIN_H_INCLUDED
      28             : #define _EMMINTRIN_H_INCLUDED
      29             : 
      30             : /* We need definitions from the SSE header files*/
      31             : #include <xmmintrin.h>
      32             : 
      33             : #ifndef __SSE2__
      34             : #pragma GCC push_options
      35             : #pragma GCC target("sse2")
      36             : #define __DISABLE_SSE2__
      37             : #endif /* __SSE2__ */
      38             : 
      39             : /* SSE2 */
      40             : typedef double __v2df __attribute__ ((__vector_size__ (16)));
      41             : typedef long long __v2di __attribute__ ((__vector_size__ (16)));
      42             : typedef unsigned long long __v2du __attribute__ ((__vector_size__ (16)));
      43             : typedef int __v4si __attribute__ ((__vector_size__ (16)));
      44             : typedef unsigned int __v4su __attribute__ ((__vector_size__ (16)));
      45             : typedef short __v8hi __attribute__ ((__vector_size__ (16)));
      46             : typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
      47             : typedef char __v16qi __attribute__ ((__vector_size__ (16)));
      48             : typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
      49             : typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
      50             : 
      51             : /* The Intel API is flexible enough that we must allow aliasing with other
      52             :    vector types, and their scalar components.  */
      53             : typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
      54             : typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
      55             : 
      56             : /* Unaligned version of the same types.  */
      57             : typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
      58             : typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
      59             : 
      60             : /* Create a selector for use with the SHUFPD instruction.  */
      61             : #define _MM_SHUFFLE2(fp1,fp0) \
      62             :  (((fp1) << 1) | (fp0))
      63             : 
      64             : /* Create a vector with element 0 as F and the rest zero.  */
      65             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      66             : _mm_set_sd (double __F)
      67             : {
      68             :   return __extension__ (__m128d){ __F, 0.0 };
      69             : }
      70             : 
      71             : /* Create a vector with both elements equal to F.  */
      72             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      73             : _mm_set1_pd (double __F)
      74             : {
      75         200 :   return __extension__ (__m128d){ __F, __F };
      76             : }
      77             : 
      78             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      79             : _mm_set_pd1 (double __F)
      80             : {
      81             :   return _mm_set1_pd (__F);
      82             : }
      83             : 
      84             : /* Create a vector with the lower value X and upper value W.  */
      85             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      86             : _mm_set_pd (double __W, double __X)
      87             : {
      88         120 :   return __extension__ (__m128d){ __X, __W };
      89             : }
      90             : 
      91             : /* Create a vector with the lower value W and upper value X.  */
      92             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
      93             : _mm_setr_pd (double __W, double __X)
      94             : {
      95             :   return __extension__ (__m128d){ __W, __X };
      96             : }
      97             : 
      98             : /* Create an undefined vector.  */
      99             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     100             : _mm_undefined_pd (void)
     101             : {
     102             :   __m128d __Y = __Y;
     103             :   return __Y;
     104             : }
     105             : 
     106             : /* Create a vector of zeros.  */
     107             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     108             : _mm_setzero_pd (void)
     109             : {
     110             :   return __extension__ (__m128d){ 0.0, 0.0 };
     111             : }
     112             : 
     113             : /* Sets the low DPFP value of A from the low value of B.  */
     114             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     115             : _mm_move_sd (__m128d __A, __m128d __B)
     116             : {
     117             :   return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
     118             : }
     119             : 
     120             : /* Load two DPFP values from P.  The address must be 16-byte aligned.  */
     121             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     122             : _mm_load_pd (double const *__P)
     123             : {
     124          40 :   return *(__m128d *)__P;
     125             : }
     126             : 
     127             : /* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
     128             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     129             : _mm_loadu_pd (double const *__P)
     130             : {
     131        6720 :   return *(__m128d_u *)__P;
     132             : }
     133             : 
     134             : /* Create a vector with all two elements equal to *P.  */
     135             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     136             : _mm_load1_pd (double const *__P)
     137             : {
     138             :   return _mm_set1_pd (*__P);
     139             : }
     140             : 
     141             : /* Create a vector with element 0 as *P and the rest zero.  */
     142             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     143             : _mm_load_sd (double const *__P)
     144             : {
     145             :   return _mm_set_sd (*__P);
     146             : }
     147             : 
     148             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     149             : _mm_load_pd1 (double const *__P)
     150             : {
     151             :   return _mm_load1_pd (__P);
     152             : }
     153             : 
     154             : /* Load two DPFP values in reverse order.  The address must be aligned.  */
     155             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     156             : _mm_loadr_pd (double const *__P)
     157             : {
     158             :   __m128d __tmp = _mm_load_pd (__P);
     159             :   return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
     160             : }
     161             : 
     162             : /* Store two DPFP values.  The address must be 16-byte aligned.  */
     163             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     164             : _mm_store_pd (double *__P, __m128d __A)
     165             : {
     166       17768 :   *(__m128d *)__P = __A;
     167             : }
     168             : 
     169             : /* Store two DPFP values.  The address need not be 16-byte aligned.  */
     170             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     171             : _mm_storeu_pd (double *__P, __m128d __A)
     172             : {
     173       15908 :   *(__m128d_u *)__P = __A;
     174             : }
     175             : 
     176             : /* Stores the lower DPFP value.  */
     177             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     178             : _mm_store_sd (double *__P, __m128d __A)
     179             : {
     180             :   *__P = ((__v2df)__A)[0];
     181             : }
     182             : 
     183             : extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     184             : _mm_cvtsd_f64 (__m128d __A)
     185             : {
     186        6172 :   return ((__v2df)__A)[0];
     187             : }
     188             : 
     189             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     190             : _mm_storel_pd (double *__P, __m128d __A)
     191             : {
     192             :   _mm_store_sd (__P, __A);
     193             : }
     194             : 
     195             : /* Stores the upper DPFP value.  */
     196             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     197             : _mm_storeh_pd (double *__P, __m128d __A)
     198             : {
     199             :   *__P = ((__v2df)__A)[1];
     200             : }
     201             : 
     202             : /* Store the lower DPFP value across two words.
     203             :    The address must be 16-byte aligned.  */
     204             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     205             : _mm_store1_pd (double *__P, __m128d __A)
     206             : {
     207             :   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
     208             : }
     209             : 
     210             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     211             : _mm_store_pd1 (double *__P, __m128d __A)
     212             : {
     213             :   _mm_store1_pd (__P, __A);
     214             : }
     215             : 
     216             : /* Store two DPFP values in reverse order.  The address must be aligned.  */
     217             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     218             : _mm_storer_pd (double *__P, __m128d __A)
     219             : {
     220             :   _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
     221             : }
     222             : 
     223             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     224             : _mm_cvtsi128_si32 (__m128i __A)
     225             : {
     226             :   return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
     227             : }
     228             : 
     229             : #ifdef __x86_64__
     230             : /* Intel intrinsic.  */
     231             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     232             : _mm_cvtsi128_si64 (__m128i __A)
     233             : {
     234             :   return ((__v2di)__A)[0];
     235             : }
     236             : 
     237             : /* Microsoft intrinsic.  */
     238             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     239             : _mm_cvtsi128_si64x (__m128i __A)
     240             : {
     241             :   return ((__v2di)__A)[0];
     242             : }
     243             : #endif
     244             : 
     245             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     246             : _mm_add_pd (__m128d __A, __m128d __B)
     247             : {
     248        8424 :   return (__m128d) ((__v2df)__A + (__v2df)__B);
     249             : }
     250             : 
     251             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     252             : _mm_add_sd (__m128d __A, __m128d __B)
     253             : {
     254             :   return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
     255             : }
     256             : 
     257             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     258             : _mm_sub_pd (__m128d __A, __m128d __B)
     259             : {
     260         200 :   return (__m128d) ((__v2df)__A - (__v2df)__B);
     261             : }
     262             : 
     263             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     264             : _mm_sub_sd (__m128d __A, __m128d __B)
     265             : {
     266             :   return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
     267             : }
     268             : 
     269             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     270             : _mm_mul_pd (__m128d __A, __m128d __B)
     271             : {
     272        1280 :   return (__m128d) ((__v2df)__A * (__v2df)__B);
     273             : }
     274             : 
     275             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     276             : _mm_mul_sd (__m128d __A, __m128d __B)
     277             : {
     278             :   return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
     279             : }
     280             : 
     281             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     282             : _mm_div_pd (__m128d __A, __m128d __B)
     283             : {
     284             :   return (__m128d) ((__v2df)__A / (__v2df)__B);
     285             : }
     286             : 
     287             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     288             : _mm_div_sd (__m128d __A, __m128d __B)
     289             : {
     290             :   return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
     291             : }
     292             : 
     293             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     294             : _mm_sqrt_pd (__m128d __A)
     295             : {
     296          80 :   return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
     297             : }
     298             : 
     299             : /* Return pair {sqrt (B[0]), A[1]}.  */
     300             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     301             : _mm_sqrt_sd (__m128d __A, __m128d __B)
     302             : {
     303             :   __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
     304             :   return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
     305             : }
     306             : 
     307             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     308             : _mm_min_pd (__m128d __A, __m128d __B)
     309             : {
     310         300 :   return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
     311             : }
     312             : 
     313             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     314             : _mm_min_sd (__m128d __A, __m128d __B)
     315             : {
     316             :   return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
     317             : }
     318             : 
     319             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     320             : _mm_max_pd (__m128d __A, __m128d __B)
     321             : {
     322         300 :   return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
     323             : }
     324             : 
     325             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     326             : _mm_max_sd (__m128d __A, __m128d __B)
     327             : {
     328             :   return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
     329             : }
     330             : 
     331             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     332             : _mm_and_pd (__m128d __A, __m128d __B)
     333             : {
     334        3228 :   return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
     335             : }
     336             : 
     337             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     338             : _mm_andnot_pd (__m128d __A, __m128d __B)
     339             : {
     340         460 :   return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
     341             : }
     342             : 
     343             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     344             : _mm_or_pd (__m128d __A, __m128d __B)
     345             : {
     346         320 :   return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
     347             : }
     348             : 
     349             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     350             : _mm_xor_pd (__m128d __A, __m128d __B)
     351             : {
     352         140 :   return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
     353             : }
     354             : 
     355             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     356             : _mm_cmpeq_pd (__m128d __A, __m128d __B)
     357             : {
     358         564 :   return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
     359             : }
     360             : 
     361             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     362             : _mm_cmplt_pd (__m128d __A, __m128d __B)
     363             : {
     364         364 :   return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
     365             : }
     366             : 
     367             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     368             : _mm_cmple_pd (__m128d __A, __m128d __B)
     369             : {
     370         380 :   return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
     371             : }
     372             : 
     373             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     374             : _mm_cmpgt_pd (__m128d __A, __m128d __B)
     375             : {
     376         360 :   return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
     377             : }
     378             : 
     379             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     380             : _mm_cmpge_pd (__m128d __A, __m128d __B)
     381             : {
     382         376 :   return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
     383             : }
     384             : 
     385             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     386             : _mm_cmpneq_pd (__m128d __A, __m128d __B)
     387             : {
     388         276 :   return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
     389             : }
     390             : 
     391             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     392             : _mm_cmpnlt_pd (__m128d __A, __m128d __B)
     393             : {
     394             :   return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
     395             : }
     396             : 
     397             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     398             : _mm_cmpnle_pd (__m128d __A, __m128d __B)
     399             : {
     400             :   return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
     401             : }
     402             : 
     403             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     404             : _mm_cmpngt_pd (__m128d __A, __m128d __B)
     405             : {
     406             :   return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
     407             : }
     408             : 
     409             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     410             : _mm_cmpnge_pd (__m128d __A, __m128d __B)
     411             : {
     412             :   return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
     413             : }
     414             : 
     415             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     416             : _mm_cmpord_pd (__m128d __A, __m128d __B)
     417             : {
     418             :   return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
     419             : }
     420             : 
     421             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     422             : _mm_cmpunord_pd (__m128d __A, __m128d __B)
     423             : {
     424             :   return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
     425             : }
     426             : 
     427             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     428             : _mm_cmpeq_sd (__m128d __A, __m128d __B)
     429             : {
     430             :   return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
     431             : }
     432             : 
     433             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     434             : _mm_cmplt_sd (__m128d __A, __m128d __B)
     435             : {
     436             :   return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
     437             : }
     438             : 
     439             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     440             : _mm_cmple_sd (__m128d __A, __m128d __B)
     441             : {
     442             :   return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
     443             : }
     444             : 
     445             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     446             : _mm_cmpgt_sd (__m128d __A, __m128d __B)
     447             : {
     448             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     449             :                                          (__v2df)
     450             :                                          __builtin_ia32_cmpltsd ((__v2df) __B,
     451             :                                                                  (__v2df)
     452             :                                                                  __A));
     453             : }
     454             : 
     455             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     456             : _mm_cmpge_sd (__m128d __A, __m128d __B)
     457             : {
     458             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     459             :                                          (__v2df)
     460             :                                          __builtin_ia32_cmplesd ((__v2df) __B,
     461             :                                                                  (__v2df)
     462             :                                                                  __A));
     463             : }
     464             : 
     465             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     466             : _mm_cmpneq_sd (__m128d __A, __m128d __B)
     467             : {
     468             :   return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
     469             : }
     470             : 
     471             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     472             : _mm_cmpnlt_sd (__m128d __A, __m128d __B)
     473             : {
     474             :   return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
     475             : }
     476             : 
     477             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     478             : _mm_cmpnle_sd (__m128d __A, __m128d __B)
     479             : {
     480             :   return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
     481             : }
     482             : 
     483             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     484             : _mm_cmpngt_sd (__m128d __A, __m128d __B)
     485             : {
     486             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     487             :                                          (__v2df)
     488             :                                          __builtin_ia32_cmpnltsd ((__v2df) __B,
     489             :                                                                   (__v2df)
     490             :                                                                   __A));
     491             : }
     492             : 
     493             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     494             : _mm_cmpnge_sd (__m128d __A, __m128d __B)
     495             : {
     496             :   return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
     497             :                                          (__v2df)
     498             :                                          __builtin_ia32_cmpnlesd ((__v2df) __B,
     499             :                                                                   (__v2df)
     500             :                                                                   __A));
     501             : }
     502             : 
     503             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     504             : _mm_cmpord_sd (__m128d __A, __m128d __B)
     505             : {
     506             :   return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
     507             : }
     508             : 
     509             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     510             : _mm_cmpunord_sd (__m128d __A, __m128d __B)
     511             : {
     512             :   return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
     513             : }
     514             : 
     515             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     516             : _mm_comieq_sd (__m128d __A, __m128d __B)
     517             : {
     518             :   return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
     519             : }
     520             : 
     521             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     522             : _mm_comilt_sd (__m128d __A, __m128d __B)
     523             : {
     524             :   return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
     525             : }
     526             : 
     527             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     528             : _mm_comile_sd (__m128d __A, __m128d __B)
     529             : {
     530             :   return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
     531             : }
     532             : 
     533             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     534             : _mm_comigt_sd (__m128d __A, __m128d __B)
     535             : {
     536             :   return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
     537             : }
     538             : 
     539             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     540             : _mm_comige_sd (__m128d __A, __m128d __B)
     541             : {
     542             :   return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
     543             : }
     544             : 
     545             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     546             : _mm_comineq_sd (__m128d __A, __m128d __B)
     547             : {
     548             :   return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
     549             : }
     550             : 
     551             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     552             : _mm_ucomieq_sd (__m128d __A, __m128d __B)
     553             : {
     554             :   return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
     555             : }
     556             : 
     557             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     558             : _mm_ucomilt_sd (__m128d __A, __m128d __B)
     559             : {
     560             :   return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
     561             : }
     562             : 
     563             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     564             : _mm_ucomile_sd (__m128d __A, __m128d __B)
     565             : {
     566             :   return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
     567             : }
     568             : 
     569             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     570             : _mm_ucomigt_sd (__m128d __A, __m128d __B)
     571             : {
     572             :   return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
     573             : }
     574             : 
     575             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     576             : _mm_ucomige_sd (__m128d __A, __m128d __B)
     577             : {
     578             :   return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
     579             : }
     580             : 
     581             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     582             : _mm_ucomineq_sd (__m128d __A, __m128d __B)
     583             : {
     584             :   return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
     585             : }
     586             : 
     587             : /* Create a vector of Qi, where i is the element number.  */
     588             : 
     589             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     590             : _mm_set_epi64x (long long __q1, long long __q0)
     591             : {
     592         100 :   return __extension__ (__m128i)(__v2di){ __q0, __q1 };
     593             : }
     594             : 
     595             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     596             : _mm_set_epi64 (__m64 __q1,  __m64 __q0)
     597             : {
     598             :   return _mm_set_epi64x ((long long)__q1, (long long)__q0);
     599             : }
     600             : 
     601             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     602             : _mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
     603             : {
     604             :   return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
     605             : }
     606             : 
     607             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     608             : _mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
     609             :                short __q3, short __q2, short __q1, short __q0)
     610             : {
     611             :   return __extension__ (__m128i)(__v8hi){
     612             :     __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
     613             : }
     614             : 
     615             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     616             : _mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
     617             :               char __q11, char __q10, char __q09, char __q08,
     618             :               char __q07, char __q06, char __q05, char __q04,
     619             :               char __q03, char __q02, char __q01, char __q00)
     620             : {
     621             :   return __extension__ (__m128i)(__v16qi){
     622             :     __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
     623             :     __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
     624             :   };
     625             : }
     626             : 
     627             : /* Set all of the elements of the vector to A.  */
     628             : 
     629             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     630             : _mm_set1_epi64x (long long __A)
     631             : {
     632         260 :   return _mm_set_epi64x (__A, __A);
     633             : }
     634             : 
     635             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     636             : _mm_set1_epi64 (__m64 __A)
     637             : {
     638             :   return _mm_set_epi64 (__A, __A);
     639             : }
     640             : 
     641             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     642             : _mm_set1_epi32 (int __A)
     643             : {
     644         260 :   return _mm_set_epi32 (__A, __A, __A, __A);
     645             : }
     646             : 
     647             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     648             : _mm_set1_epi16 (short __A)
     649             : {
     650             :   return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
     651             : }
     652             : 
     653             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     654             : _mm_set1_epi8 (char __A)
     655             : {
     656             :   return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
     657             :                        __A, __A, __A, __A, __A, __A, __A, __A);
     658             : }
     659             : 
     660             : /* Create a vector of Qi, where i is the element number.
     661             :    The parameter order is reversed from the _mm_set_epi* functions.  */
     662             : 
     663             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     664             : _mm_setr_epi64 (__m64 __q0, __m64 __q1)
     665             : {
     666             :   return _mm_set_epi64 (__q1, __q0);
     667             : }
     668             : 
     669             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     670             : _mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
     671             : {
     672             :   return _mm_set_epi32 (__q3, __q2, __q1, __q0);
     673             : }
     674             : 
     675             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     676             : _mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
     677             :                 short __q4, short __q5, short __q6, short __q7)
     678             : {
     679             :   return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
     680             : }
     681             : 
     682             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     683             : _mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
     684             :                char __q04, char __q05, char __q06, char __q07,
     685             :                char __q08, char __q09, char __q10, char __q11,
     686             :                char __q12, char __q13, char __q14, char __q15)
     687             : {
     688             :   return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
     689             :                        __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
     690             : }
     691             : 
     692             : /* Create a vector with element 0 as *P and the rest zero.  */
     693             : 
     694             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     695             : _mm_load_si128 (__m128i const *__P)
     696             : {
     697             :   return *__P;
     698             : }
     699             : 
     700             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     701             : _mm_loadu_si128 (__m128i_u const *__P)
     702             : {
     703             :   return *__P;
     704             : }
     705             : 
     706             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     707             : _mm_loadl_epi64 (__m128i_u const *__P)
     708             : {
     709             :   return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P);
     710             : }
     711             : 
     712             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     713             : _mm_store_si128 (__m128i *__P, __m128i __B)
     714             : {
     715             :   *__P = __B;
     716             : }
     717             : 
     718             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     719             : _mm_storeu_si128 (__m128i_u *__P, __m128i __B)
     720             : {
     721             :   *__P = __B;
     722             : }
     723             : 
     724             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     725             : _mm_storel_epi64 (__m128i_u *__P, __m128i __B)
     726             : {
     727             :   *(__m64_u *)__P = (__m64) ((__v2di)__B)[0];
     728             : }
     729             : 
     730             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     731             : _mm_movepi64_pi64 (__m128i __B)
     732             : {
     733             :   return (__m64) ((__v2di)__B)[0];
     734             : }
     735             : 
     736             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     737             : _mm_movpi64_epi64 (__m64 __A)
     738             : {
     739             :   return _mm_set_epi64 ((__m64)0LL, __A);
     740             : }
     741             : 
     742             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     743             : _mm_move_epi64 (__m128i __A)
     744             : {
     745             :   return (__m128i)__builtin_ia32_movq128 ((__v2di) __A);
     746             : }
     747             : 
     748             : /* Create an undefined vector.  */
     749             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     750             : _mm_undefined_si128 (void)
     751             : {
     752             :   __m128i __Y = __Y;
     753             :   return __Y;
     754             : }
     755             : 
     756             : /* Create a vector of zeros.  */
     757             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     758             : _mm_setzero_si128 (void)
     759             : {
     760             :   return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
     761             : }
     762             : 
     763             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     764             : _mm_cvtepi32_pd (__m128i __A)
     765             : {
     766             :   return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
     767             : }
     768             : 
     769             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     770             : _mm_cvtepi32_ps (__m128i __A)
     771             : {
     772             :   return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
     773             : }
     774             : 
     775             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     776             : _mm_cvtpd_epi32 (__m128d __A)
     777             : {
     778             :   return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
     779             : }
     780             : 
     781             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     782             : _mm_cvtpd_pi32 (__m128d __A)
     783             : {
     784             :   return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
     785             : }
     786             : 
     787             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     788             : _mm_cvtpd_ps (__m128d __A)
     789             : {
     790             :   return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
     791             : }
     792             : 
     793             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     794             : _mm_cvttpd_epi32 (__m128d __A)
     795             : {
     796             :   return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
     797             : }
     798             : 
     799             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     800             : _mm_cvttpd_pi32 (__m128d __A)
     801             : {
     802             :   return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
     803             : }
     804             : 
     805             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     806             : _mm_cvtpi32_pd (__m64 __A)
     807             : {
     808             :   return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
     809             : }
     810             : 
     811             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     812             : _mm_cvtps_epi32 (__m128 __A)
     813             : {
     814         100 :   return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
     815             : }
     816             : 
     817             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     818             : _mm_cvttps_epi32 (__m128 __A)
     819             : {
     820             :   return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
     821             : }
     822             : 
     823             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     824             : _mm_cvtps_pd (__m128 __A)
     825             : {
     826             :   return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
     827             : }
     828             : 
     829             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     830             : _mm_cvtsd_si32 (__m128d __A)
     831             : {
     832             :   return __builtin_ia32_cvtsd2si ((__v2df) __A);
     833             : }
     834             : 
     835             : #ifdef __x86_64__
     836             : /* Intel intrinsic.  */
     837             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     838             : _mm_cvtsd_si64 (__m128d __A)
     839             : {
     840         400 :   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
     841             : }
     842             : 
     843             : /* Microsoft intrinsic.  */
     844             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     845             : _mm_cvtsd_si64x (__m128d __A)
     846             : {
     847             :   return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
     848             : }
     849             : #endif
     850             : 
     851             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     852             : _mm_cvttsd_si32 (__m128d __A)
     853             : {
     854             :   return __builtin_ia32_cvttsd2si ((__v2df) __A);
     855             : }
     856             : 
     857             : #ifdef __x86_64__
     858             : /* Intel intrinsic.  */
     859             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     860             : _mm_cvttsd_si64 (__m128d __A)
     861             : {
     862             :   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
     863             : }
     864             : 
     865             : /* Microsoft intrinsic.  */
     866             : extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     867             : _mm_cvttsd_si64x (__m128d __A)
     868             : {
     869             :   return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
     870             : }
     871             : #endif
     872             : 
     873             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     874             : _mm_cvtsd_ss (__m128 __A, __m128d __B)
     875             : {
     876             :   return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
     877             : }
     878             : 
     879             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     880             : _mm_cvtsi32_sd (__m128d __A, int __B)
     881             : {
     882             :   return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
     883             : }
     884             : 
     885             : #ifdef __x86_64__
     886             : /* Intel intrinsic.  */
     887             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     888             : _mm_cvtsi64_sd (__m128d __A, long long __B)
     889             : {
     890             :   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
     891             : }
     892             : 
     893             : /* Microsoft intrinsic.  */
     894             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     895             : _mm_cvtsi64x_sd (__m128d __A, long long __B)
     896             : {
     897             :   return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
     898             : }
     899             : #endif
     900             : 
     901             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     902             : _mm_cvtss_sd (__m128d __A, __m128 __B)
     903             : {
     904             :   return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
     905             : }
     906             : 
     907             : #ifdef __OPTIMIZE__
     908             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     909             : _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask)
     910             : {
     911        4240 :   return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask);
     912             : }
     913             : #else
     914             : #define _mm_shuffle_pd(A, B, N)                                         \
     915             :   ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A),                \
     916             :                                    (__v2df)(__m128d)(B), (int)(N)))
     917             : #endif
     918             : 
     919             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     920             : _mm_unpackhi_pd (__m128d __A, __m128d __B)
     921             : {
     922             :   return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
     923             : }
     924             : 
     925             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     926             : _mm_unpacklo_pd (__m128d __A, __m128d __B)
     927             : {
     928             :   return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
     929             : }
     930             : 
     931             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     932             : _mm_loadh_pd (__m128d __A, double const *__B)
     933             : {
     934             :   return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
     935             : }
     936             : 
     937             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     938             : _mm_loadl_pd (__m128d __A, double const *__B)
     939             : {
     940             :   return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
     941             : }
     942             : 
     943             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     944             : _mm_movemask_pd (__m128d __A)
     945             : {
     946             :   return __builtin_ia32_movmskpd ((__v2df)__A);
     947             : }
     948             : 
     949             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     950             : _mm_packs_epi16 (__m128i __A, __m128i __B)
     951             : {
     952             :   return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
     953             : }
     954             : 
     955             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     956             : _mm_packs_epi32 (__m128i __A, __m128i __B)
     957             : {
     958             :   return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
     959             : }
     960             : 
     961             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     962             : _mm_packus_epi16 (__m128i __A, __m128i __B)
     963             : {
     964             :   return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
     965             : }
     966             : 
     967             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     968             : _mm_unpackhi_epi8 (__m128i __A, __m128i __B)
     969             : {
     970             :   return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
     971             : }
     972             : 
     973             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     974             : _mm_unpackhi_epi16 (__m128i __A, __m128i __B)
     975             : {
     976             :   return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
     977             : }
     978             : 
     979             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     980             : _mm_unpackhi_epi32 (__m128i __A, __m128i __B)
     981             : {
     982             :   return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
     983             : }
     984             : 
     985             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     986             : _mm_unpackhi_epi64 (__m128i __A, __m128i __B)
     987             : {
     988             :   return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
     989             : }
     990             : 
     991             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     992             : _mm_unpacklo_epi8 (__m128i __A, __m128i __B)
     993             : {
     994             :   return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
     995             : }
     996             : 
     997             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
     998             : _mm_unpacklo_epi16 (__m128i __A, __m128i __B)
     999             : {
    1000             :   return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
    1001             : }
    1002             : 
    1003             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1004             : _mm_unpacklo_epi32 (__m128i __A, __m128i __B)
    1005             : {
    1006             :   return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
    1007             : }
    1008             : 
    1009             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1010             : _mm_unpacklo_epi64 (__m128i __A, __m128i __B)
    1011             : {
    1012             :   return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
    1013             : }
    1014             : 
    1015             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1016             : _mm_add_epi8 (__m128i __A, __m128i __B)
    1017             : {
    1018             :   return (__m128i) ((__v16qu)__A + (__v16qu)__B);
    1019             : }
    1020             : 
    1021             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1022             : _mm_add_epi16 (__m128i __A, __m128i __B)
    1023             : {
    1024             :   return (__m128i) ((__v8hu)__A + (__v8hu)__B);
    1025             : }
    1026             : 
    1027             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1028             : _mm_add_epi32 (__m128i __A, __m128i __B)
    1029             : {
    1030             :   return (__m128i) ((__v4su)__A + (__v4su)__B);
    1031             : }
    1032             : 
    1033             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1034             : _mm_add_epi64 (__m128i __A, __m128i __B)
    1035             : {
    1036             :   return (__m128i) ((__v2du)__A + (__v2du)__B);
    1037             : }
    1038             : 
    1039             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1040             : _mm_adds_epi8 (__m128i __A, __m128i __B)
    1041             : {
    1042             :   return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
    1043             : }
    1044             : 
    1045             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1046             : _mm_adds_epi16 (__m128i __A, __m128i __B)
    1047             : {
    1048             :   return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
    1049             : }
    1050             : 
    1051             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1052             : _mm_adds_epu8 (__m128i __A, __m128i __B)
    1053             : {
    1054             :   return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
    1055             : }
    1056             : 
    1057             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1058             : _mm_adds_epu16 (__m128i __A, __m128i __B)
    1059             : {
    1060             :   return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
    1061             : }
    1062             : 
    1063             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1064             : _mm_sub_epi8 (__m128i __A, __m128i __B)
    1065             : {
    1066             :   return (__m128i) ((__v16qu)__A - (__v16qu)__B);
    1067             : }
    1068             : 
    1069             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1070             : _mm_sub_epi16 (__m128i __A, __m128i __B)
    1071             : {
    1072             :   return (__m128i) ((__v8hu)__A - (__v8hu)__B);
    1073             : }
    1074             : 
    1075             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1076             : _mm_sub_epi32 (__m128i __A, __m128i __B)
    1077             : {
    1078             :   return (__m128i) ((__v4su)__A - (__v4su)__B);
    1079             : }
    1080             : 
    1081             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1082             : _mm_sub_epi64 (__m128i __A, __m128i __B)
    1083             : {
    1084             :   return (__m128i) ((__v2du)__A - (__v2du)__B);
    1085             : }
    1086             : 
    1087             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1088             : _mm_subs_epi8 (__m128i __A, __m128i __B)
    1089             : {
    1090             :   return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
    1091             : }
    1092             : 
    1093             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1094             : _mm_subs_epi16 (__m128i __A, __m128i __B)
    1095             : {
    1096             :   return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
    1097             : }
    1098             : 
    1099             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1100             : _mm_subs_epu8 (__m128i __A, __m128i __B)
    1101             : {
    1102             :   return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
    1103             : }
    1104             : 
    1105             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1106             : _mm_subs_epu16 (__m128i __A, __m128i __B)
    1107             : {
    1108             :   return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
    1109             : }
    1110             : 
    1111             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1112             : _mm_madd_epi16 (__m128i __A, __m128i __B)
    1113             : {
    1114             :   return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
    1115             : }
    1116             : 
    1117             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1118             : _mm_mulhi_epi16 (__m128i __A, __m128i __B)
    1119             : {
    1120             :   return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
    1121             : }
    1122             : 
    1123             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1124             : _mm_mullo_epi16 (__m128i __A, __m128i __B)
    1125             : {
    1126             :   return (__m128i) ((__v8hu)__A * (__v8hu)__B);
    1127             : }
    1128             : 
    1129             : extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1130             : _mm_mul_su32 (__m64 __A, __m64 __B)
    1131             : {
    1132             :   return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
    1133             : }
    1134             : 
    1135             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1136             : _mm_mul_epu32 (__m128i __A, __m128i __B)
    1137             : {
    1138             :   return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
    1139             : }
    1140             : 
    1141             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1142             : _mm_slli_epi16 (__m128i __A, int __B)
    1143             : {
    1144             :   return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
    1145             : }
    1146             : 
    1147             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1148             : _mm_slli_epi32 (__m128i __A, int __B)
    1149             : {
    1150             :   return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
    1151             : }
    1152             : 
    1153             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1154             : _mm_slli_epi64 (__m128i __A, int __B)
    1155             : {
    1156             :   return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
    1157             : }
    1158             : 
    1159             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1160             : _mm_srai_epi16 (__m128i __A, int __B)
    1161             : {
    1162             :   return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
    1163             : }
    1164             : 
    1165             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1166             : _mm_srai_epi32 (__m128i __A, int __B)
    1167             : {
    1168             :   return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
    1169             : }
    1170             : 
    1171             : #ifdef __OPTIMIZE__
    1172             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1173             : _mm_bsrli_si128 (__m128i __A, const int __N)
    1174             : {
    1175             :   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
    1176             : }
    1177             : 
    1178             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1179             : _mm_bslli_si128 (__m128i __A, const int __N)
    1180             : {
    1181             :   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
    1182             : }
    1183             : 
    1184             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1185             : _mm_srli_si128 (__m128i __A, const int __N)
    1186             : {
    1187             :   return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8);
    1188             : }
    1189             : 
    1190             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1191             : _mm_slli_si128 (__m128i __A, const int __N)
    1192             : {
    1193             :   return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8);
    1194             : }
    1195             : #else
    1196             : #define _mm_bsrli_si128(A, N) \
    1197             :   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
    1198             : #define _mm_bslli_si128(A, N) \
    1199             :   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
    1200             : #define _mm_srli_si128(A, N) \
    1201             :   ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8))
    1202             : #define _mm_slli_si128(A, N) \
    1203             :   ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8))
    1204             : #endif
    1205             : 
    1206             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1207             : _mm_srli_epi16 (__m128i __A, int __B)
    1208             : {
    1209             :   return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
    1210             : }
    1211             : 
    1212             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1213             : _mm_srli_epi32 (__m128i __A, int __B)
    1214             : {
    1215             :   return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
    1216             : }
    1217             : 
    1218             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1219             : _mm_srli_epi64 (__m128i __A, int __B)
    1220             : {
    1221             :   return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
    1222             : }
    1223             : 
    1224             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1225             : _mm_sll_epi16 (__m128i __A, __m128i __B)
    1226             : {
    1227             :   return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
    1228             : }
    1229             : 
    1230             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1231             : _mm_sll_epi32 (__m128i __A, __m128i __B)
    1232             : {
    1233             :   return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
    1234             : }
    1235             : 
    1236             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1237             : _mm_sll_epi64 (__m128i __A, __m128i __B)
    1238             : {
    1239             :   return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
    1240             : }
    1241             : 
    1242             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1243             : _mm_sra_epi16 (__m128i __A, __m128i __B)
    1244             : {
    1245             :   return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
    1246             : }
    1247             : 
    1248             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1249             : _mm_sra_epi32 (__m128i __A, __m128i __B)
    1250             : {
    1251             :   return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
    1252             : }
    1253             : 
    1254             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1255             : _mm_srl_epi16 (__m128i __A, __m128i __B)
    1256             : {
    1257             :   return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
    1258             : }
    1259             : 
    1260             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1261             : _mm_srl_epi32 (__m128i __A, __m128i __B)
    1262             : {
    1263             :   return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
    1264             : }
    1265             : 
    1266             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1267             : _mm_srl_epi64 (__m128i __A, __m128i __B)
    1268             : {
    1269             :   return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
    1270             : }
    1271             : 
    1272             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1273             : _mm_and_si128 (__m128i __A, __m128i __B)
    1274             : {
    1275             :   return (__m128i) ((__v2du)__A & (__v2du)__B);
    1276             : }
    1277             : 
    1278             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1279             : _mm_andnot_si128 (__m128i __A, __m128i __B)
    1280             : {
    1281             :   return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
    1282             : }
    1283             : 
    1284             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1285             : _mm_or_si128 (__m128i __A, __m128i __B)
    1286             : {
    1287         144 :   return (__m128i) ((__v2du)__A | (__v2du)__B);
    1288             : }
    1289             : 
    1290             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1291             : _mm_xor_si128 (__m128i __A, __m128i __B)
    1292             : {
    1293             :   return (__m128i) ((__v2du)__A ^ (__v2du)__B);
    1294             : }
    1295             : 
    1296             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1297             : _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
    1298             : {
    1299             :   return (__m128i) ((__v16qs)__A == (__v16qs)__B);
    1300             : }
    1301             : 
    1302             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1303             : _mm_cmpeq_epi16 (__m128i __A, __m128i __B)
    1304             : {
    1305             :   return (__m128i) ((__v8hi)__A == (__v8hi)__B);
    1306             : }
    1307             : 
    1308             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1309             : _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
    1310             : {
    1311         128 :   return (__m128i) ((__v4si)__A == (__v4si)__B);
    1312             : }
    1313             : 
    1314             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1315             : _mm_cmplt_epi8 (__m128i __A, __m128i __B)
    1316             : {
    1317             :   return (__m128i) ((__v16qs)__A < (__v16qs)__B);
    1318             : }
    1319             : 
    1320             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1321             : _mm_cmplt_epi16 (__m128i __A, __m128i __B)
    1322             : {
    1323             :   return (__m128i) ((__v8hi)__A < (__v8hi)__B);
    1324             : }
    1325             : 
    1326             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1327             : _mm_cmplt_epi32 (__m128i __A, __m128i __B)
    1328             : {
    1329             :   return (__m128i) ((__v4si)__A < (__v4si)__B);
    1330             : }
    1331             : 
    1332             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1333             : _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
    1334             : {
    1335             :   return (__m128i) ((__v16qs)__A > (__v16qs)__B);
    1336             : }
    1337             : 
    1338             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1339             : _mm_cmpgt_epi16 (__m128i __A, __m128i __B)
    1340             : {
    1341             :   return (__m128i) ((__v8hi)__A > (__v8hi)__B);
    1342             : }
    1343             : 
    1344             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1345             : _mm_cmpgt_epi32 (__m128i __A, __m128i __B)
    1346             : {
    1347             :   return (__m128i) ((__v4si)__A > (__v4si)__B);
    1348             : }
    1349             : 
    1350             : #ifdef __OPTIMIZE__
    1351             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1352             : _mm_extract_epi16 (__m128i const __A, int const __N)
    1353             : {
    1354             :   return (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
    1355             : }
    1356             : 
    1357             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1358             : _mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
    1359             : {
    1360             :   return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
    1361             : }
    1362             : #else
    1363             : #define _mm_extract_epi16(A, N) \
    1364             :   ((int) (unsigned short) __builtin_ia32_vec_ext_v8hi ((__v8hi)(__m128i)(A), (int)(N)))
    1365             : #define _mm_insert_epi16(A, D, N)                               \
    1366             :   ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(__m128i)(A), \
    1367             :                                           (int)(D), (int)(N)))
    1368             : #endif
    1369             : 
    1370             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1371             : _mm_max_epi16 (__m128i __A, __m128i __B)
    1372             : {
    1373             :   return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
    1374             : }
    1375             : 
    1376             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1377             : _mm_max_epu8 (__m128i __A, __m128i __B)
    1378             : {
    1379             :   return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
    1380             : }
    1381             : 
    1382             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1383             : _mm_min_epi16 (__m128i __A, __m128i __B)
    1384             : {
    1385             :   return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
    1386             : }
    1387             : 
    1388             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1389             : _mm_min_epu8 (__m128i __A, __m128i __B)
    1390             : {
    1391             :   return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
    1392             : }
    1393             : 
    1394             : extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1395             : _mm_movemask_epi8 (__m128i __A)
    1396             : {
    1397         128 :   return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
    1398             : }
    1399             : 
    1400             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1401             : _mm_mulhi_epu16 (__m128i __A, __m128i __B)
    1402             : {
    1403             :   return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
    1404             : }
    1405             : 
    1406             : #ifdef __OPTIMIZE__
    1407             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1408             : _mm_shufflehi_epi16 (__m128i __A, const int __mask)
    1409             : {
    1410             :   return (__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __mask);
    1411             : }
    1412             : 
    1413             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1414             : _mm_shufflelo_epi16 (__m128i __A, const int __mask)
    1415             : {
    1416             :   return (__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __mask);
    1417             : }
    1418             : 
    1419             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1420             : _mm_shuffle_epi32 (__m128i __A, const int __mask)
    1421             : {
    1422             :   return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask);
    1423             : }
    1424             : #else
    1425             : #define _mm_shufflehi_epi16(A, N) \
    1426             :   ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N)))
    1427             : #define _mm_shufflelo_epi16(A, N) \
    1428             :   ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N)))
    1429             : #define _mm_shuffle_epi32(A, N) \
    1430             :   ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N)))
    1431             : #endif
    1432             : 
    1433             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1434             : _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
    1435             : {
    1436             :   __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
    1437             : }
    1438             : 
    1439             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1440             : _mm_avg_epu8 (__m128i __A, __m128i __B)
    1441             : {
    1442             :   return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
    1443             : }
    1444             : 
    1445             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1446             : _mm_avg_epu16 (__m128i __A, __m128i __B)
    1447             : {
    1448             :   return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
    1449             : }
    1450             : 
    1451             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1452             : _mm_sad_epu8 (__m128i __A, __m128i __B)
    1453             : {
    1454             :   return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
    1455             : }
    1456             : 
    1457             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1458             : _mm_stream_si32 (int *__A, int __B)
    1459             : {
    1460             :   __builtin_ia32_movnti (__A, __B);
    1461             : }
    1462             : 
    1463             : #ifdef __x86_64__
    1464             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1465             : _mm_stream_si64 (long long int *__A, long long int __B)
    1466             : {
    1467             :   __builtin_ia32_movnti64 (__A, __B);
    1468             : }
    1469             : #endif
    1470             : 
    1471             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1472             : _mm_stream_si128 (__m128i *__A, __m128i __B)
    1473             : {
    1474             :   __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
    1475             : }
    1476             : 
    1477             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1478             : _mm_stream_pd (double *__A, __m128d __B)
    1479             : {
    1480             :   __builtin_ia32_movntpd (__A, (__v2df)__B);
    1481             : }
    1482             : 
    1483             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1484             : _mm_clflush (void const *__A)
    1485             : {
    1486             :   __builtin_ia32_clflush (__A);
    1487             : }
    1488             : 
    1489             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1490             : _mm_lfence (void)
    1491             : {
    1492             :   __builtin_ia32_lfence ();
    1493             : }
    1494             : 
    1495             : extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1496             : _mm_mfence (void)
    1497             : {
    1498             :   __builtin_ia32_mfence ();
    1499             : }
    1500             : 
    1501             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1502             : _mm_cvtsi32_si128 (int __A)
    1503             : {
    1504             :   return _mm_set_epi32 (0, 0, 0, __A);
    1505             : }
    1506             : 
    1507             : #ifdef __x86_64__
    1508             : /* Intel intrinsic.  */
    1509             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1510             : _mm_cvtsi64_si128 (long long __A)
    1511             : {
    1512             :   return _mm_set_epi64x (0, __A);
    1513             : }
    1514             : 
    1515             : /* Microsoft intrinsic.  */
    1516             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1517             : _mm_cvtsi64x_si128 (long long __A)
    1518             : {
    1519             :   return _mm_set_epi64x (0, __A);
    1520             : }
    1521             : #endif
    1522             : 
    1523             : /* Casts between various SP, DP, INT vector types.  Note that these do no
    1524             :    conversion of values, they just change the type.  */
    1525             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1526             : _mm_castpd_ps(__m128d __A)
    1527             : {
    1528             :   return (__m128) __A;
    1529             : }
    1530             : 
    1531             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1532             : _mm_castpd_si128(__m128d __A)
    1533             : {
    1534        1220 :   return (__m128i) __A;
    1535             : }
    1536             : 
    1537             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1538             : _mm_castps_pd(__m128 __A)
    1539             : {
    1540             :   return (__m128d) __A;
    1541             : }
    1542             : 
    1543             : extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1544             : _mm_castps_si128(__m128 __A)
    1545             : {
    1546        1220 :   return (__m128i) __A;
    1547             : }
    1548             : 
    1549             : extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1550             : _mm_castsi128_ps(__m128i __A)
    1551             : {
    1552         136 :   return (__m128) __A;
    1553             : }
    1554             : 
    1555             : extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    1556             : _mm_castsi128_pd(__m128i __A)
    1557             : {
    1558         136 :   return (__m128d) __A;
    1559             : }
    1560             : 
    1561             : #ifdef __DISABLE_SSE2__
    1562             : #undef __DISABLE_SSE2__
    1563             : #pragma GCC pop_options
    1564             : #endif /* __DISABLE_SSE2__ */
    1565             : 
    1566             : #endif /* _EMMINTRIN_H_INCLUDED */

Generated by: LCOV version 1.13