ITK
4.1.0
Insight Segmentation and Registration Toolkit
|
00001 /*========================================================================= 00002 * 00003 * Copyright Insight Software Consortium 00004 * 00005 * Licensed under the Apache License, Version 2.0 (the "License"); 00006 * you may not use this file except in compliance with the License. 00007 * You may obtain a copy of the License at 00008 * 00009 * http://www.apache.org/licenses/LICENSE-2.0.txt 00010 * 00011 * Unless required by applicable law or agreed to in writing, software 00012 * distributed under the License is distributed on an "AS IS" BASIS, 00013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00014 * See the License for the specific language governing permissions and 00015 * limitations under the License. 00016 * 00017 *=========================================================================*/ 00018 /*========================================================================= 00019 * 00020 * Portions of this file are subject to the VTK Toolkit Version 3 copyright. 00021 * 00022 * Copyright (c) Ken Martin, Will Schroeder, Bill Lorensen 00023 * 00024 * For complete copyright, license and disclaimer of warranty information 00025 * please refer to the NOTICE file at the top of the ITK source tree. 00026 * 00027 *=========================================================================*/ 00028 #ifndef __itkMathDetail_h 00029 #define __itkMathDetail_h 00030 00031 #include "vnl/vnl_math.h" 00032 #include "itkIntTypes.h" 00033 #include "itkNumericTraits.h" 00034 00035 #ifdef ITK_HAVE_FENV_H 00036 // The Sun Studio CC compiler seems to have a bug where if cstdio is 00037 // included stdio.h must also be included before fenv.h 00038 #include <stdio.h> 00039 #include <fenv.h> // should this be cfenv? 00040 #endif /* ITK_HAVE_FENV_H */ 00041 00042 #ifdef ITK_HAVE_EMMINTRIN_H 00043 #include <emmintrin.h> // sse 2 intrinsics 00044 #endif /* ITK_HAVE_EMMINTRIN_H */ 00045 00046 // assume no SSE2: 00047 #define USE_SSE2_64IMPL 0 00048 #define USE_SSE2_32IMPL 0 00049 00050 // For apple assume sse2 is on for all intel builds, check for 64 and 32 00051 // bit versions 00052 #if defined(__APPLE__) && defined( __SSE2__ ) 00053 00054 # if defined( __i386__ ) 00055 # undef USE_SSE2_32IMPL 00056 # define USE_SSE2_32IMPL 1 00057 # endif 00058 00059 # if defined( __x86_64 ) 00060 // Turn on the 64 bits implementation 00061 # undef USE_SSE2_64IMPL 00062 # define USE_SSE2_64IMPL 1 00063 // Turn on also the 32 bits implementation 00064 // since it is available in 64 bits versions. 00065 # undef USE_SSE2_32IMPL 00066 # define USE_SSE2_32IMPL 1 00067 # endif 00068 00069 #else 00070 00071 // For non-apple (no universal binary possible) just use the 00072 // try-compile set ITK_COMPILER_SUPPORTS_SSE2_32 and 00073 // ITK_COMPILER_SUPPORTS_SSE2_64 to set values: 00074 00075 # if defined(ITK_COMPILER_SUPPORTS_SSE2_32) && !defined( __GCCXML__ ) 00076 # undef USE_SSE2_32IMPL 00077 # define USE_SSE2_32IMPL 1 00078 # endif 00079 # if defined(ITK_COMPILER_SUPPORTS_SSE2_64) && !defined( __GCCXML__ ) 00080 # undef USE_SSE2_64IMPL 00081 # define USE_SSE2_64IMPL 1 00082 # endif 00083 00084 #endif 00085 00086 00087 // Turn on 32-bit and 64-bit asm impl when using GCC on x86 platform with the 00088 // following exception: 00089 // GCCXML 00090 #if defined( __GNUC__ ) && ( !defined( __GCCXML__ ) ) && ( defined( __i386__ ) || defined( __i386 ) \ 00091 || defined( __x86_64__ ) || defined( __x86_64 ) ) 00092 #define GCC_USE_ASM_32IMPL 1 00093 #define GCC_USE_ASM_64IMPL 1 00094 #else 00095 #define GCC_USE_ASM_32IMPL 0 00096 #define GCC_USE_ASM_64IMPL 0 00097 #endif 00098 // Turn on 32-bit and 64-bit asm impl when using msvc on 32 bits windows 00099 #if defined( VCL_VC ) && ( !defined( __GCCXML__ ) ) && !defined( _WIN64 ) 00100 #define VC_USE_ASM_32IMPL 1 00101 #define VC_USE_ASM_64IMPL 1 00102 #else 00103 #define VC_USE_ASM_32IMPL 0 00104 #define VC_USE_ASM_64IMPL 0 00105 #endif 00106 00107 namespace itk 00108 { 00109 namespace Math 00110 { 00111 namespace Detail 00112 { 00113 // The functions defined in this namespace are not meant to be used directly 00114 // and thus do not adhere to the standard backward-compatibility 00115 // policy of ITK, as any Detail namespace should be considered private. 00116 // Please use the functions from the itk::Math namespace instead 00117 00119 // Base versions 00120 00121 template< typename TReturn, typename TInput > 00122 inline TReturn RoundHalfIntegerToEven_base(TInput x) 00123 { 00124 if ( NumericTraits< TInput >::IsNonnegative(x) ) 00125 { 00126 x += static_cast< TInput >( 0.5 ); 00127 } 00128 else 00129 { 00130 x -= static_cast< TInput >( 0.5 ); 00131 } 00132 00133 const TReturn r = static_cast< TReturn >( x ); 00134 return ( x != static_cast< TInput >( r ) ) ? r : static_cast< TReturn >( 2 * ( r / 2 ) ); 00135 } 00136 00137 template< typename TReturn, typename TInput > 00138 inline TReturn RoundHalfIntegerUp_base(TInput x) 00139 { 00140 x += static_cast< TInput >( 0.5 ); 00141 const TReturn r = static_cast< TReturn >( x ); 00142 return ( NumericTraits< TInput >::IsNonnegative(x) ) ? 00143 r : 00144 ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) ); 00145 } 00146 00147 template< typename TReturn, typename TInput > 00148 inline TReturn Floor_base(TInput x) 00149 { 00150 const TReturn r = static_cast< TReturn >( x ); 00151 00152 return ( NumericTraits< TInput >::IsNonnegative(x) ) ? 00153 r : 00154 ( x == static_cast< TInput >( r ) ? r : r - static_cast< TReturn >( 1 ) ); 00155 } 00156 00157 template< typename TReturn, typename TInput > 00158 inline TReturn Ceil_base(TInput x) 00159 { 00160 const TReturn r = static_cast< TReturn >( x ); 00161 00162 return ( NumericTraits< TInput >::IsNegative(x) ) ? 00163 r : 00164 ( x == static_cast< TInput >( r ) ? r : r + static_cast< TReturn >( 1 ) ); 00165 } 00166 00168 // 32 bits versions 00169 00170 #if USE_SSE2_32IMPL // sse2 implementation 00171 00172 inline int32_t RoundHalfIntegerToEven_32(double x) 00173 { 00174 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00175 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00176 #endif 00177 return _mm_cvtsd_si32( _mm_set_sd(x) ); 00178 } 00179 00180 inline int32_t RoundHalfIntegerToEven_32(float x) 00181 { 00182 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00183 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00184 #endif 00185 return _mm_cvtss_si32( _mm_set_ss(x) ); 00186 } 00187 00188 #elif GCC_USE_ASM_32IMPL // gcc asm implementation 00189 00190 inline int32_t RoundHalfIntegerToEven_32(double x) 00191 { 00192 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00193 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00194 #endif 00195 int32_t r; 00196 __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" ); 00197 return r; 00198 } 00199 00200 inline int32_t RoundHalfIntegerToEven_32(float x) 00201 { 00202 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00203 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00204 #endif 00205 int32_t r; 00206 __asm__ __volatile__ ( "fistpl %0" : "=m" ( r ) : "t" ( x ) : "st" ); 00207 return r; 00208 } 00209 00210 #elif VC_USE_ASM_32IMPL // msvc asm implementation 00211 00212 inline int32_t RoundHalfIntegerToEven_32(double x) 00213 { 00214 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00215 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00216 #endif 00217 int32_t r; 00218 __asm 00219 { 00220 fld x 00221 fistp r 00222 } 00223 return r; 00224 } 00225 00226 inline int32_t RoundHalfIntegerToEven_32(float x) 00227 { 00228 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00229 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00230 #endif 00231 int32_t r; 00232 __asm 00233 { 00234 fld x 00235 fistp r 00236 } 00237 return r; 00238 } 00239 00240 #else // Base implementation 00241 00242 inline int32_t RoundHalfIntegerToEven_32(double x) { return RoundHalfIntegerToEven_base< int32_t, double >(x); } 00243 inline int32_t RoundHalfIntegerToEven_32(float x) { return RoundHalfIntegerToEven_base< int32_t, float >(x); } 00244 00245 #endif 00246 00247 #if USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL 00248 00249 inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerToEven_32(2 * x + 0.5) >> 1; } 00250 inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerToEven_32(2 * x + 0.5f) >> 1; } 00251 00252 inline int32_t Floor_32(double x) { return RoundHalfIntegerToEven_32(2 * x - 0.5) >> 1; } 00253 inline int32_t Floor_32(float x) { return RoundHalfIntegerToEven_32(2 * x - 0.5f) >> 1; } 00254 00255 inline int32_t Ceil_32(double x) { return -( RoundHalfIntegerToEven_32(-0.5 - 2 * x) >> 1 ); } 00256 inline int32_t Ceil_32(float x) { return -( RoundHalfIntegerToEven_32(-0.5f - 2 * x) >> 1 ); } 00257 00258 #else // Base implementation 00259 00260 inline int32_t RoundHalfIntegerUp_32(double x) { return RoundHalfIntegerUp_base< int32_t, double >(x); } 00261 inline int32_t RoundHalfIntegerUp_32(float x) { return RoundHalfIntegerUp_base< int32_t, float >(x); } 00262 00263 inline int32_t Floor_32(double x) { return Floor_base< int32_t, double >(x); } 00264 inline int32_t Floor_32(float x) { return Floor_base< int32_t, float >(x); } 00265 00266 inline int32_t Ceil_32(double x) { return Ceil_base< int32_t, double >(x); } 00267 inline int32_t Ceil_32(float x) { return Ceil_base< int32_t, float >(x); } 00268 00269 #endif // USE_SSE2_32IMPL || GCC_USE_ASM_32IMPL || VC_USE_ASM_32IMPL 00270 00272 // 64 bits versions 00273 00274 #if USE_SSE2_64IMPL // sse2 implementation 00275 00276 inline int64_t RoundHalfIntegerToEven_64(double x) 00277 { 00278 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00279 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00280 #endif 00281 return _mm_cvtsd_si64( _mm_set_sd(x) ); 00282 } 00283 00284 inline int64_t RoundHalfIntegerToEven_64(float x) 00285 { 00286 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00287 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00288 #endif 00289 return _mm_cvtss_si64( _mm_set_ss(x) ); 00290 } 00291 00292 #elif GCC_USE_ASM_64IMPL // gcc asm implementation 00293 00294 inline int64_t RoundHalfIntegerToEven_64(double x) 00295 { 00296 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00297 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00298 #endif 00299 int64_t r; 00300 __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" ); 00301 return r; 00302 } 00303 00304 inline int64_t RoundHalfIntegerToEven_64(float x) 00305 { 00306 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00307 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00308 #endif 00309 int64_t r; 00310 __asm__ __volatile__ ( "fistpll %0" : "=m" ( r ) : "t" ( x ) : "st" ); 00311 return r; 00312 } 00313 00314 #elif VC_USE_ASM_64IMPL // msvc asm implementation 00315 00316 inline int64_t RoundHalfIntegerToEven_64(double x) 00317 { 00318 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00319 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00320 #endif 00321 int64_t r; 00322 __asm 00323 { 00324 fld x 00325 fistp r 00326 } 00327 return r; 00328 } 00329 00330 inline int64_t RoundHalfIntegerToEven_64(float x) 00331 { 00332 #if defined( ITK_CHECK_FPU_ROUNDING_MODE ) && defined( HAVE_FENV_H ) 00333 itkAssertInDebugAndIgnoreInReleaseMacro(fegetround() == FE_TONEAREST); 00334 #endif 00335 int64_t r; 00336 __asm 00337 { 00338 fld x 00339 fistp r 00340 } 00341 return r; 00342 } 00343 00344 #else // Base implementation 00345 00346 inline int64_t RoundHalfIntegerToEven_64(double x) { return RoundHalfIntegerToEven_base< int64_t, double >(x); } 00347 inline int64_t RoundHalfIntegerToEven_64(float x) { return RoundHalfIntegerToEven_base< int64_t, float >(x); } 00348 00349 #endif 00350 00351 #if USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL 00352 00353 inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerToEven_64(2 * x + 0.5) >> 1; } 00354 inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerToEven_64(2 * x + 0.5f) >> 1; } 00355 00356 inline int64_t Floor_64(double x) { return RoundHalfIntegerToEven_64(2 * x - 0.5) >> 1; } 00357 inline int64_t Floor_64(float x) { return RoundHalfIntegerToEven_64(2 * x - 0.5f) >> 1; } 00358 00359 inline int64_t Ceil_64(double x) { return -( RoundHalfIntegerToEven_64(-0.5 - 2 * x) >> 1 ); } 00360 inline int64_t Ceil_64(float x) { return -( RoundHalfIntegerToEven_64(-0.5f - 2 * x) >> 1 ); } 00361 00362 #else // Base implementation 00363 00364 inline int64_t RoundHalfIntegerUp_64(double x) { return RoundHalfIntegerUp_base< int64_t, double >(x); } 00365 inline int64_t RoundHalfIntegerUp_64(float x) { return RoundHalfIntegerUp_base< int64_t, float >(x); } 00366 00367 inline int64_t Floor_64(double x) { return Floor_base< int64_t, double >(x); } 00368 inline int64_t Floor_64(float x) { return Floor_base< int64_t, float >(x); } 00369 00370 inline int64_t Ceil_64(double x) { return Ceil_base< int64_t, double >(x); } 00371 inline int64_t Ceil_64(float x) { return Ceil_base< int64_t, float >(x); } 00372 00373 #endif // USE_SSE2_64IMPL || GCC_USE_ASM_64IMPL || VC_USE_ASM_64IMPL 00374 } // end namespace Detail 00375 } // end namespace Math 00376 00377 // move to itkConceptChecking? 00378 namespace Concept 00379 { 00380 template< typename T > 00381 struct FloatOrDouble; 00382 template< > 00383 struct FloatOrDouble< float > {}; 00384 template< > 00385 struct FloatOrDouble< double > {}; 00386 } // end namespace Concept 00387 } // end namespace itk 00388 00389 #undef USE_SSE2_32IMPL 00390 #undef GCC_USE_ASM_32IMPL 00391 #undef VC_USE_ASM_32IMPL 00392 00393 #undef USE_SSE2_64IMPL 00394 #undef GCC_USE_ASM_64IMPL 00395 #undef VC_USE_ASM_64IMPL 00396 00397 #endif // end of itkMathDetail.h 00398