itkMathDetail.h Source File

Go to the documentation of this file.
00001 /*=========================================================================
00002  *
00003  *  Copyright Insight Software Consortium
00004  *
00005  *  Licensed under the Apache License, Version 2.0 (the "License");
00006  *  you may not use this file except in compliance with the License.
00007  *  You may obtain a copy of the License at
00008  *
00009  *         http://www.apache.org/licenses/LICENSE-2.0.txt
00010  *
00011  *  Unless required by applicable law or agreed to in writing, software
00012  *  distributed under the License is distributed on an "AS IS" BASIS,
00013  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00014  *  See the License for the specific language governing permissions and
00015  *  limitations under the License.
00016  *
00017  *=========================================================================*/
00018 /*=========================================================================
00019  *
00020  *  Portions of this file are subject to the VTK Toolkit Version 3 copyright.
00021  *
00022  *  Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen
00023  *
00024  *  For complete copyright, license and disclaimer of warranty information
00025  *  please refer to the NOTICE file at the top of the ITK source tree.
00026  *
00027  *=========================================================================*/
00028 #ifndef __itkMathDetail_h
00029 #define __itkMathDetail_h
00030 
00031 #include "vnl/vnl_math.h"
00032 #include "itkIntTypes.h"
00033 #include "itkNumericTraits.h"
00034 
00035 #ifdef ITK_HAVE_FENV_H
00036 // The Sun Studio CC compiler seems to have a bug where if cstdio is
00037 // included stdio.h must also be included before fenv.h
00038 #include <stdio.h>
00039 #include <fenv.h> // should this be cfenv?
00040 #endif /* ITK_HAVE_FENV_H */
00041 
00042 #ifdef ITK_HAVE_EMMINTRIN_H
00043 #include <emmintrin.h> // sse 2 intrinsics
00044 #endif /* ITK_HAVE_EMMINTRIN_H */
00045 
00046 // assume no SSE2:
00047 #define USE_SSE2_64IMPL 0
00048 #define USE_SSE2_32IMPL 0
00049 
00050 // For apple assume sse2 is on for all intel builds, check for 64 and 32
00051 // bit versions
00052 #if defined(__APPLE__) && defined( __SSE2__ )
00053 
00054 #  if defined( __i386__ )
00055 #    undef  USE_SSE2_32IMPL
00056 #    define USE_SSE2_32IMPL 1
00057 #  endif
00058 
00059 #  if defined(  __x86_64 )
00060 //   Turn on the 64 bits implementation
00061 #    undef  USE_SSE2_64IMPL
00062 #    define USE_SSE2_64IMPL 1
00063 //   Turn on also the 32 bits implementation
00064 //   since it is available in 64 bits versions.
00065 #    undef  USE_SSE2_32IMPL
00066 #    define USE_SSE2_32IMPL 1
00067 #  endif
00068 
00069 #else
00070 
00071 // For non-apple (no universal binary possible) just use the
00072 // try-compile set ITK_COMPILER_SUPPORTS_SSE2_32 and
00073 // ITK_COMPILER_SUPPORTS_SSE2_64 to set values:
00074 
00075 #  if defined(ITK_COMPILER_SUPPORTS_SSE2_32) && !defined( __GCCXML__ )
00076 #    undef  USE_SSE2_32IMPL
00077 #    define USE_SSE2_32IMPL 1
00078 #  endif
00079 #  if defined(ITK_COMPILER_SUPPORTS_SSE2_64) && !defined( __GCCXML__ )
00080 #    undef  USE_SSE2_64IMPL
00081 #    define USE_SSE2_64IMPL 1
00082 #  endif
00083 
00084 #endif
00085 
00086 
00087 // Turn on 32-bit and 64-bit asm impl when using GCC on x86 platform with the
00088 // following exception:
00089 //   GCCXML
00090 #if defined( __GNUC__ ) && ( !defined( __GCCXML__ ) ) &&  ( defined( __i386__ ) || defined( __i386 ) \
00091   || defined( __x86_64__ ) || defined( __x86_64 ) )
00092 #define GCC_USE_ASM_32IMPL 1
00093 #define GCC_USE_ASM_64IMPL 1
00094 #else
00095 #define GCC_USE_ASM_32IMPL 0
00096 #define GCC_USE_ASM_64IMPL 0
00097 #endif
00098 // Turn on 32-bit and 64-bit asm impl when using msvc on 32 bits windows
00099 #if defined( VCL_VC ) && ( !defined( __GCCXML__ ) ) && !defined( _WIN64 )
00100 #define VC_USE_ASM_32IMPL 1
00101 #define VC_USE_ASM_64IMPL 1
00102 #else
00103 #define VC_USE_ASM_32IMPL 0
00104 #define VC_USE_ASM_64IMPL 0
00105 #endif
00106 
00107 namespace itk
00108 {
00109 namespace Math
00110 {
00111 namespace Detail
00112 {
00113 // The functions defined in this namespace are not meant to be used directly
00114 // and thus do not adhere to the standard backward-compatibility
00115 // policy of ITK, as any Detail namespace should be considered private.
00116 // Please use the functions from the itk::Math namespace instead
00117 
00119 // Base versions
00120 
00121 template< typename TReturn, typename TInput >
00122 inline TReturn RoundHalfIntegerToEven_base(TInput x)
00123 {
00124   if ( NumericTraits< TInput >::IsNonnegative(x) )
00125     {
00126     x += static_cast< TInput >( 0.5 );
00127     }
00128   else
00129     {
00130     x -= static_cast< TInput >( 0.5 );
00131     }
00132 
00133   const TReturn r = static_cast< TReturn >( x );
00134   return ( x != static_cast< TInput >( r ) ) ? r : static_cast< TReturn >( 2 * ( r / 2 ) );
00135 }
00136 
00137 template< typename TReturn, typename TInput >
00138 inline TReturn RoundHalfIntegerUp_base(TInput x)
00139 {
00140   x += static_cast< TInput >( 0.5 );
00141   const TReturn r = static_cast< TReturn >( x );
00142   return ( NumericTraits< TInput >::IsNonnegative(x) ) ?
00143          r :
00144          ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) );
00145 }
00146 
00147 template< typename TReturn, typename TInput >
00148 inline TReturn Floor_base(TInput x)
00149 {
00150   const TReturn r = static_cast< TReturn >( x );
00151 
00152   return ( NumericTraits< TInput >::IsNonnegative(x) ) ?
00153          r :
00154          ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) );
00155 }
00156 
00157 template< typename TReturn, typename TInput >
00158 inline TReturn Ceil_base(TInput x)
00159 {
00160   const TReturn r = static_cast< TReturn >( x );
00161 
00162   return ( NumericTraits< TInput >::IsNegative(x) ) ?
00163          r :
00164          ( x == static_cast< TInput >( r ) ? r : r + static_cast< TReturn >( 1 ) );
00165 }
00166 
00168 // 32 bits versions
00169 
00170 #if USE_SSE2_32IMPL // sse2 implementation
00171 
00172 inline int32_t RoundHalfIntegerToEven_32(double x)
00173 {
00174   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00175   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00176   #endif
00177   return _mm_cvtsd_si32( _mm_set_sd(x) );
00178 }
00179 
00180 inline int32_t RoundHalfIntegerToEven_32(float x)
00181 {
00182   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00183   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00184   #endif
00185   return _mm_cvtss_si32( _mm_set_ss(x) );
00186 }
00187 
00188 #elif GCC_USE_ASM_32IMPL // gcc asm implementation
00189 
00190 inline int32_t RoundHalfIntegerToEven_32(double x)
00191 {
00192   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00193   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00194   #endif
00195   int32_t r;
00196   __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" );
00197   return r;
00198 }
00199 
00200 inline int32_t RoundHalfIntegerToEven_32(float x)
00201 {
00202   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00203   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00204   #endif
00205   int32_t r;
00206   __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" );
00207   return r;
00208 }
00209 
00210 #elif VC_USE_ASM_32IMPL // msvc asm implementation
00211 
00212 inline int32_t RoundHalfIntegerToEven_32(double x)
00213 {
00214   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00215   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00216   #endif
00217   int32_t r;
00218   __asm
00219     {
00220     fld x
00221     fistp r
00222     }
00223   return r;
00224 }
00225 
00226 inline int32_t RoundHalfIntegerToEven_32(float x)
00227 {
00228   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00229   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00230   #endif
00231   int32_t r;
00232   __asm
00233     {
00234     fld x
00235     fistp r
00236     }
00237   return r;
00238 }
00239 
00240 #else // Base implementation
00241 
00242 inline int32_t RoundHalfIntegerToEven_32(double x) { return RoundHalfIntegerToEven_base< int32_t, double >(x); }
00243 inline int32_t RoundHalfIntegerToEven_32(float x) { return RoundHalfIntegerToEven_base< int32_t, float >(x); }
00244 
00245 #endif
00246 
00247 #if USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL
00248 
00249 inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerToEven_32(2 * x + 0.5) >> 1; }
00250 inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerToEven_32(2 * x + 0.5f) >> 1; }
00251 
00252 inline int32_t Floor_32(double x) { return RoundHalfIntegerToEven_32(2 * x - 0.5) >> 1; }
00253 inline int32_t Floor_32(float x) { return RoundHalfIntegerToEven_32(2 * x - 0.5f) >> 1; }
00254 
00255 inline int32_t Ceil_32(double x) { return -( RoundHalfIntegerToEven_32(-0.5 - 2 * x) >> 1 ); }
00256 inline int32_t Ceil_32(float x) { return -( RoundHalfIntegerToEven_32(-0.5f - 2 * x) >> 1 ); }
00257 
00258 #else // Base implementation
00259 
00260 inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerUp_base< int32_t, double >(x); }
00261 inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerUp_base< int32_t, float >(x); }
00262 
00263 inline int32_t Floor_32(double x) { return Floor_base< int32_t, double >(x); }
00264 inline int32_t Floor_32(float x) { return Floor_base< int32_t, float >(x); }
00265 
00266 inline int32_t Ceil_32(double x) { return Ceil_base< int32_t, double >(x); }
00267 inline int32_t Ceil_32(float x) { return Ceil_base< int32_t, float >(x); }
00268 
00269 #endif // USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL
00270 
00272 // 64 bits versions
00273 
00274 #if USE_SSE2_64IMPL // sse2 implementation
00275 
00276 inline int64_t RoundHalfIntegerToEven_64(double x)
00277 {
00278   #if defined( ITK_CHECK_FPU_ROUNDING_MODE )  && defined( HAVE_FENV_H )
00279   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00280   #endif
00281   return _mm_cvtsd_si64( _mm_set_sd(x) );
00282 }
00283 
00284 inline int64_t RoundHalfIntegerToEven_64(float x)
00285 {
00286   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00287   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00288   #endif
00289   return _mm_cvtss_si64( _mm_set_ss(x) );
00290 }
00291 
00292 #elif GCC_USE_ASM_64IMPL // gcc asm implementation
00293 
00294 inline int64_t RoundHalfIntegerToEven_64(double x)
00295 {
00296   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00297   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00298   #endif
00299   int64_t r;
00300   __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" );
00301   return r;
00302 }
00303 
00304 inline int64_t RoundHalfIntegerToEven_64(float x)
00305 {
00306   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00307   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00308   #endif
00309   int64_t r;
00310   __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" );
00311   return r;
00312 }
00313 
00314 #elif VC_USE_ASM_64IMPL // msvc asm implementation
00315 
00316 inline int64_t RoundHalfIntegerToEven_64(double x)
00317 {
00318   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00319   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00320   #endif
00321   int64_t r;
00322   __asm
00323     {
00324     fld x
00325     fistp r
00326     }
00327   return r;
00328 }
00329 
00330 inline int64_t RoundHalfIntegerToEven_64(float x)
00331 {
00332   #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H )
00333   itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST);
00334   #endif
00335   int64_t r;
00336   __asm
00337     {
00338     fld x
00339     fistp r
00340     }
00341   return r;
00342 }
00343 
00344 #else // Base implementation
00345 
00346 inline int64_t RoundHalfIntegerToEven_64(double x) { return RoundHalfIntegerToEven_base< int64_t, double >(x); }
00347 inline int64_t RoundHalfIntegerToEven_64(float x) { return RoundHalfIntegerToEven_base< int64_t, float >(x); }
00348 
00349 #endif
00350 
00351 #if USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL
00352 
00353 inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerToEven_64(2 * x + 0.5) >> 1; }
00354 inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerToEven_64(2 * x + 0.5f) >> 1; }
00355 
00356 inline int64_t Floor_64(double x) { return RoundHalfIntegerToEven_64(2 * x - 0.5) >> 1; }
00357 inline int64_t Floor_64(float x) { return RoundHalfIntegerToEven_64(2 * x - 0.5f) >> 1; }
00358 
00359 inline int64_t Ceil_64(double x) { return -( RoundHalfIntegerToEven_64(-0.5 - 2 * x) >> 1 ); }
00360 inline int64_t Ceil_64(float x) { return -( RoundHalfIntegerToEven_64(-0.5f - 2 * x) >> 1 ); }
00361 
00362 #else // Base implementation
00363 
00364 inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerUp_base< int64_t, double >(x); }
00365 inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerUp_base< int64_t, float >(x); }
00366 
00367 inline int64_t Floor_64(double x) { return Floor_base< int64_t, double >(x); }
00368 inline int64_t Floor_64(float x) { return Floor_base< int64_t, float >(x); }
00369 
00370 inline int64_t Ceil_64(double x) { return Ceil_base< int64_t, double >(x); }
00371 inline int64_t Ceil_64(float x) { return Ceil_base< int64_t, float >(x); }
00372 
00373 #endif // USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL
00374 } // end namespace Detail
00375 } // end namespace Math
00376 
00377 // move to itkConceptChecking?
00378 namespace Concept
00379 {
00380 template< typename T >
00381 struct FloatOrDouble;
00382 template< >
00383 struct FloatOrDouble< float > {};
00384 template< >
00385 struct FloatOrDouble< double > {};
00386 } // end namespace Concept
00387 } // end namespace itk
00388 
00389 #undef USE_SSE2_32IMPL
00390 #undef GCC_USE_ASM_32IMPL
00391 #undef VC_USE_ASM_32IMPL
00392 
00393 #undef USE_SSE2_64IMPL
00394 #undef GCC_USE_ASM_64IMPL
00395 #undef VC_USE_ASM_64IMPL
00396 
00397 #endif // end of itkMathDetail.h
00398