LCOV - code coverage report
Current view: top level - Src/SSE41 - InaVecSSE41Float.hpp (source / functions) Hit Total Coverage
Test: Coverage inastemp Lines: 21 21 100.0 %
Date: 2022-03-17 09:48:28 Functions: 1 1 100.0 %

          Line data    Source code
       1             : ///////////////////////////////////////////////////////////////////////////
       2             : // Inastemp - Berenger Bramas MPCDF - 2016
       3             : // Under MIT Licence, please you must read the LICENCE file.
       4             : ///////////////////////////////////////////////////////////////////////////
       5             : #ifndef INAVECSSE41FLOAT_HPP
       6             : #define INAVECSSE41FLOAT_HPP
       7             : 
       8             : #include "InastempGlobal.h"
       9             : #include "SSSE3/InaVecSSSE3Float.hpp"
      10             : 
      11             : #ifndef INASTEMP_USE_SSE41
      12             : #error InaVecSSE41<float> is included but SSE41 is not enable in the configuration
      13             : #endif
      14             : 
      15             : #include <tmmintrin.h>
      16             : #include <emmintrin.h>
      17             : #include <smmintrin.h>
      18             : 
      19             : template <class RealType>
      20             : class InaVecSSE41;
      21             : 
      22             : template <>
      23             : class alignas(16) InaVecSSE41<float> : public InaVecSSSE3<float> {
      24             :     using Parent = InaVecSSSE3<float>;
      25             : 
      26             : public:
      27             :     using Parent::GetVecLength;
      28             : 
      29       15818 :     using InaVecSSSE3<float>::InaVecSSSE3;
      30             : 
      31         288 :     inline InaVecSSE41(){}
      32             : 
      33             :     inline InaVecSSE41(const InaVecSSSE3<float>& other)
      34             :         : Parent(other){}
      35             : 
      36             :     // Re-put exp to benefit from Floor
      37          26 :     inline InaVecSSE41<float> exp() const {
      38             : #ifdef __INTEL_COMPILER
      39             :         return _mm_exp_ps(Parent::vec);
      40             : #else
      41          26 :         const __m128 COEFF_LOG2E = _mm_set1_ps(float(InaFastExp::CoeffLog2E()));
      42          26 :         const __m128 COEFF_A     = _mm_set1_ps(float(InaFastExp::CoeffA32()));
      43          26 :         const __m128 COEFF_B     = _mm_set1_ps(float(InaFastExp::CoeffB32()));
      44          26 :         const __m128 COEFF_P5_A  = _mm_set1_ps(float(InaFastExp::GetCoefficient6_5()));
      45          26 :         const __m128 COEFF_P5_B  = _mm_set1_ps(float(InaFastExp::GetCoefficient6_4()));
      46          26 :         const __m128 COEFF_P5_C  = _mm_set1_ps(float(InaFastExp::GetCoefficient6_3()));
      47          26 :         const __m128 COEFF_P5_D  = _mm_set1_ps(float(InaFastExp::GetCoefficient6_2()));
      48          26 :         const __m128 COEFF_P5_E  = _mm_set1_ps(float(InaFastExp::GetCoefficient6_1()));
      49          26 :         const __m128 COEFF_P5_F  = _mm_set1_ps(float(InaFastExp::GetCoefficient6_0()));
      50             : 
      51          52 :         __m128 x = _mm_mul_ps( Parent::vec , COEFF_LOG2E);
      52             : 
      53          78 :         const __m128 fractional_part = _mm_sub_ps(x, InaVecSSE41(x).floor().vec);
      54             : 
      55         234 :         __m128 factor = _mm_add_ps(_mm_mul_ps(_mm_add_ps( _mm_mul_ps(_mm_add_ps(
      56             :                          _mm_mul_ps(_mm_add_ps( _mm_mul_ps(_mm_add_ps(_mm_mul_ps(
      57             :                          COEFF_P5_A, fractional_part), COEFF_P5_B), fractional_part), COEFF_P5_C),fractional_part),
      58          26 :                          COEFF_P5_D), fractional_part), COEFF_P5_E),fractional_part), COEFF_P5_F);
      59             : 
      60          26 :         x = _mm_sub_ps(x,factor);
      61             : 
      62          78 :         __m128i castedInteger = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(COEFF_A, x), COEFF_B));
      63             : 
      64          52 :         return _mm_castsi128_ps(castedInteger);
      65             : #endif
      66             :     }
      67             : 
      68             :     inline InaVecSSE41<float> ExpLowAcc() const {
      69             :         const __m128 COEFF_LOG2E = _mm_set1_ps(float(InaFastExp::CoeffLog2E()));
      70             :         const __m128 COEFF_A     = _mm_set1_ps(float(InaFastExp::CoeffA32()));
      71             :         const __m128 COEFF_B     = _mm_set1_ps(float(InaFastExp::CoeffB32()));
      72             :         const __m128 COEFF_P5_D  = _mm_set1_ps(float(InaFastExp::GetCoefficient3_2()));
      73             :         const __m128 COEFF_P5_E  = _mm_set1_ps(float(InaFastExp::GetCoefficient3_1()));
      74             :         const __m128 COEFF_P5_F  = _mm_set1_ps(float(InaFastExp::GetCoefficient3_0()));
      75             : 
      76             :         __m128 x = _mm_mul_ps( Parent::vec , COEFF_LOG2E);
      77             : 
      78             :         const __m128 fractional_part = _mm_sub_ps(x, InaVecSSE41(x).floor().vec);
      79             : 
      80             :         __m128 factor = _mm_add_ps(_mm_mul_ps(
      81             :                          _mm_add_ps(_mm_mul_ps(
      82             :                                          COEFF_P5_D, fractional_part),
      83             :                                          COEFF_P5_E), fractional_part),
      84             :                                          COEFF_P5_F);
      85             : 
      86             :         x = _mm_sub_ps(x,factor);
      87             : 
      88             :         __m128i castedInteger = _mm_cvtps_epi32(_mm_add_ps(_mm_mul_ps(COEFF_A, x), COEFF_B));
      89             : 
      90             :         return _mm_castsi128_ps(castedInteger);
      91             :     }
      92             : 
      93             :     inline InaVecSSE41<float> floor() const {
      94         228 :         return _mm_floor_ps(Parent::vec);
      95             :     }
      96             : 
      97             :     inline static const char* GetName() {
      98             :         return "InaVecSSE41<float>";
      99             :     }
     100             : 
     101             :     inline static InaIfElse< InaVecSSE41<float> >::ThenClass If(const typename Parent::MaskType inTest) {
     102          30 :         return InaIfElse< InaVecSSE41<float> >::IfClass().If(inTest);
     103             :     }
     104             : };
     105             : 
     106             : #endif

Generated by: LCOV version 1.13