[ITK-users] GPUDiscreteGaussian not working
Jose Ignacio Prieto
joseignacio.prieto at gmail.com
Tue Apr 15 11:18:34 EDT 2014
Hi all, I am having trouble using GPUdiscretegaussian. It works for me on
CPU but GPU version gives output 0. I tried running the test code but no
help. I do run GPUMean filter. My card is AMDw7000 and using opencl 1.2,
itk 4.6
Here is the code and the output. The images are vtk files of 320x320x231,
ushort.
/*=========================================================================
*
* Copyright Insight Software Consortium
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0.txt
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*=========================================================================*/
#include "itkImageFileReader.h"
#include "itkImageFileWriter.h"
#include "itkGPUImage.h"
#include "itkGPUKernelManager.h"
#include "itkGPUContextManager.h"
#include "itkGPUImageToImageFilter.h"
#include "itkGPUNeighborhoodOperatorImageFilter.h"
#include "itkTimeProbe.h"
#include "itkGaussianOperator.h"
#include "itkDiscreteGaussianImageFilter.h"
#include "itkGPUDiscreteGaussianImageFilter.h"
#include "itkMeanImageFilter.h"
#include "itkGPUMeanImageFilter.h"
// typedef float InputPixelType;
// typedef float OutputPixelType;
typedef short InputPixelType;
typedef short OutputPixelType;
typedef itk::GPUImage< InputPixelType, 3 > InputImageType;
typedef itk::GPUImage< OutputPixelType, 3 > OutputImageType;
typedef itk::ImageFileReader< InputImageType > ReaderType;
typedef itk::ImageFileWriter< OutputImageType > WriterType;
int main(int argc, char *argv[])
{
if(!itk::IsGPUAvailable())
{
std::cerr << "OpenCL-enabled GPU is not present." << std::endl;
return EXIT_FAILURE;
}
if( argc < 3 )
{
std::cerr << "Error: missing arguments" << std::endl;
std::cerr << "inputfile outputfile [num_dimensions]" << std::endl;
return EXIT_FAILURE;
}
std::string inFile( argv[1] );
std::string outFile( argv[2] );
unsigned int dim = 3;
ReaderType::Pointer reader;
WriterType::Pointer writer;
reader = ReaderType::New();
writer = WriterType::New();
reader->SetFileName( inFile );
writer->SetFileName( outFile );
float variance = 4.0;
// test 1~8 threads for CPU
int nThreads = 8;
typedef itk::DiscreteGaussianImageFilter< InputImageType,
OutputImageType> CPUFilterType;
CPUFilterType::Pointer CPUFilter = CPUFilterType::New();
itk::TimeProbe cputimer;
cputimer.Start();
CPUFilter->SetNumberOfThreads( nThreads );
CPUFilter->SetInput( reader->GetOutput() );
CPUFilter->SetMaximumKernelWidth(10);
CPUFilter->SetUseImageSpacingOff();
CPUFilter->SetVariance( variance );
CPUFilter->Update();
cputimer.Stop();
// typedef itk::MeanImageFilter< InputImageType, OutputImageType>
CPUFilterType;
// CPUFilterType::Pointer CPUFilter = CPUFilterType::New();
// itk::TimeProbe cputimer;
// cputimer.Start();
// CPUFilter->SetNumberOfThreads( nThreads );
// CPUFilter->SetInput( reader->GetOutput() );
//// CPUFilter->SetMaximumKernelWidth(10);
//// CPUFilter->SetUseImageSpacingOff();
// CPUFilter->SetRadius( variance );
// CPUFilter->Update();
// cputimer.Stop();
std::cout << "CPU Gaussian Filter took " << cputimer.GetMean() <<
" seconds with "
<< CPUFilter->GetNumberOfThreads() << " threads.\n" << std::endl;
// -------
typedef itk::GPUDiscreteGaussianImageFilter< InputImageType,
OutputImageType> GPUFilterType;
GPUFilterType::Pointer GPUFilter = GPUFilterType::New();
itk::TimeProbe gputimer;
gputimer.Start();
GPUFilter->SetInput( reader->GetOutput() );
GPUFilter->SetVariance( variance );
GPUFilter->SetMaximumKernelWidth(10);
GPUFilter->SetUseImageSpacingOff();
// GPUFilter->DebugOn();
// GPUFilter->GPUEnabledOff();
GPUFilter->Print(std::cout);
GPUFilter->Update();
GPUFilter->GetOutput()->UpdateBuffers(); // synchronization point
(GPU->CPU memcpy)
gputimer.Stop();
std::cout << "GPU Gaussian Filter took " << gputimer.GetMean() <<
" seconds.\n" << std::endl;
// typedef itk::GPUMeanImageFilter< InputImageType,
OutputImageType> GPUFilterType;
// GPUFilterType::Pointer GPUFilter = GPUFilterType::New();
// itk::TimeProbe gputimer;
// gputimer.Start();
// GPUFilter->SetInput( reader->GetOutput() );
//// GPUFilter->SetVariance( variance );
//// GPUFilter->SetMaximumKernelWidth(10);
//// GPUFilter->SetUseImageSpacingOff();
//// GPUFilter->DebugOn();
//// GPUFilter->Print(std::cout);
// GPUFilter->SetRadius( variance );
// GPUFilter->Update();
// GPUFilter->GetOutput()->UpdateBuffers(); // synchronization
point (GPU->CPU memcpy)
// gputimer.Stop();
// std::cout << "GPU Gaussian Filter took " << gputimer.GetMean()
<< " seconds.\n" << std::endl;
// ---------------
// RMS Error check
// ---------------
double diff = 0;
unsigned int nPix = 0;
itk::ImageRegionIterator<OutputImageType>
cit(CPUFilter->GetOutput(),
CPUFilter->GetOutput()->GetLargestPossibleRegion());
itk::ImageRegionIterator<OutputImageType>
git(GPUFilter->GetOutput(),
GPUFilter->GetOutput()->GetLargestPossibleRegion());
for(cit.GoToBegin(), git.GoToBegin(); !cit.IsAtEnd(); ++cit, ++git)
{
double err = (double)(cit.Get()) - (double)(git.Get());
// if(err > 0.1 || (double)cit.Get() < 0.1) std::cout
<< "CPU : " << (double)(cit.Get()) << ", GPU : " <<
(double)(git.Get()) << std::endl;
diff += err*err;
nPix++;
}
writer->SetInput( GPUFilter->GetOutput() );
// writer->SetInput( CPUFilter->GetOutput() );
writer->Update();
if (nPix > 0)
{
double RMSError = sqrt( diff / (double)nPix );
std::cout << "RMS Error : " << RMSError << std::endl;
// the CPU filter operator has type double
// but the double precision is not well-supported on most GPUs
// and by most drivers at this time. Therefore, the GPU filter
// operator has type float
// relax the RMS threshold here to allow for errors due to
// differences in precision
// NOTE:
// a threshold of 1.2e-5 worked on linux and Mac, but not Windows
// why?
double RMSThreshold = 1.7e-5;
if (vnl_math_isnan(RMSError))
{
std::cout << "RMS Error is NaN! nPix: " << nPix << std::endl;
return EXIT_FAILURE;
}
if (RMSError > RMSThreshold)
{
std::cout << "RMS Error exceeds threshold (" <<
RMSThreshold << ")" << std::endl;
return EXIT_FAILURE;
}
}
else
{
std::cout << "No pixels in output!" << std::endl;
return EXIT_FAILURE;
}
}
OUTPUT
Starting C:\DocsMaracuya\Build\Ejemplos\Gpu\GPUTest.exe...
Platform : AMD Accelerated Parallel Processing
Platform : AMD Accelerated Parallel Processing
Pitcairn
Maximum Work Item Sizes : { 256, 256, 256 }
Maximum Work Group Size : 256
Alignment in bits of the base address : 2048
Smallest alignment in bytes for any data type : 128
cl_khr_fp64 cl_amd_fp64 cl_khr_global_int32_base_atomics
cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics
cl_khr_local_int32_extended_atomics cl_khr_int64_base_atomics
cl_khr_int64_extended_atomics cl_khr_3d_image_writes
cl_khr_byte_addressable_store cl_khr_gl_sharing cl_ext_atomic_counters_32
cl_amd_device_attribute_query cl_amd_vec3 cl_amd_printf cl_amd_media_ops
cl_amd_media_ops2 cl_amd_popcnt cl_khr_d3d10_sharing
cl_amd_bus_addressable_memory cl_amd_c1x_atomics
CPU Gaussian Filter took 1.70355 seconds with 8 threads.
Defines: #define DIM_3
#define INTYPE short
#define OUTTYPE short
#define OPTYPE short
Defines: #define DIM_3
#define INTYPE short
#define OUTTYPE short
#define OPTYPE short
Defines: #define DIM_3
#define INTYPE short
#define OUTTYPE short
#define OPTYPE short
GPUDiscreteGaussianImageFilter (0000000002205DF0)
RTTI typeinfo: class itk::GPUDiscreteGaussianImageFilter<class
itk::GPUImage<short,3>,class itk::GPUImage<short,3> >
Reference Count: 1
Modified Time: 560
Debug: Off
Object Name:
Observers:
none
Inputs:
Primary: (000000000216E560) *
Indexed Inputs:
0: Primary (000000000216E560)
Required Input Names: Primary
NumberOfRequiredInputs: 1
Outputs:
Primary: (000000000218A070)
Indexed Outputs:
0: Primary (000000000218A070)
NumberOfRequiredOutputs: 1
Number Of Threads: 8
ReleaseDataFlag: Off
ReleaseDataBeforeUpdateFlag: Off
AbortGenerateData: Off
Progress: 0
Multithreader:
RTTI typeinfo: class itk::MultiThreader
Reference Count: 1
Modified Time: 499
Debug: Off
Object Name:
Observers:
none
Thread Count: 8
Global Maximum Number Of Threads: 128
Global Default Number Of Threads: 8
CoordinateTolerance: 1e-006
DirectionTolerance: 1e-006
Variance: [4, 4, 4]
MaximumError: [0.01, 0.01, 0.01]
MaximumKernelWidth: 10
FilterDimensionality: 3
UseImageSpacing: 0
InternalNumberOfStreamDivisions: 9
GPU: Enabled
GPU Gaussian Filter took 0.111351 seconds.
RMS Error : 26.4279
RMS Error exceeds threshold (1.7e-005)
C:\DocsMaracuya\Build\Ejemplos\Gpu\GPUTest.exe exited with code 1
--
José Ignacio Prieto
celular(nuevo): 94348182
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.itk.org/pipermail/insight-users/attachments/20140415/9ca04190/attachment-0001.html>
More information about the Insight-users
mailing list