[ITK-users] GPUDiscreteGaussian not working

Jose Ignacio Prieto joseignacio.prieto at gmail.com
Tue Apr 15 11:18:34 EDT 2014


Hi all, I am having trouble using GPUdiscretegaussian. It works for me on
CPU but GPU version gives output 0. I tried running the test code but no
help. I do run GPUMean filter. My card is AMDw7000 and using opencl 1.2,
itk 4.6

Here is the code and the output. The images are vtk files of 320x320x231,
ushort.

/*=========================================================================

*

*  Copyright Insight Software Consortium

*

*  Licensed under the Apache License, Version 2.0 (the "License");

*  you may not use this file except in compliance with the License.

*  You may obtain a copy of the License at

*

*         http://www.apache.org/licenses/LICENSE-2.0.txt

*

*  Unless required by applicable law or agreed to in writing, software

*  distributed under the License is distributed on an "AS IS" BASIS,

*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

*  See the License for the specific language governing permissions and

*  limitations under the License.

*

*=========================================================================*/


#include "itkImageFileReader.h"

#include "itkImageFileWriter.h"


#include "itkGPUImage.h"

#include "itkGPUKernelManager.h"

#include "itkGPUContextManager.h"

#include "itkGPUImageToImageFilter.h"

#include "itkGPUNeighborhoodOperatorImageFilter.h"


#include "itkTimeProbe.h"

#include "itkGaussianOperator.h"


#include "itkDiscreteGaussianImageFilter.h"

#include "itkGPUDiscreteGaussianImageFilter.h"

#include "itkMeanImageFilter.h"

#include "itkGPUMeanImageFilter.h"


//  typedef float InputPixelType;

//  typedef float OutputPixelType;

typedef  short InputPixelType;

typedef  short OutputPixelType;


typedef itk::GPUImage< InputPixelType,  3 >   InputImageType;

typedef itk::GPUImage< OutputPixelType, 3 >   OutputImageType;




typedef itk::ImageFileReader< InputImageType  >  ReaderType;

typedef itk::ImageFileWriter< OutputImageType >  WriterType;




int main(int argc, char *argv[])

{

    if(!itk::IsGPUAvailable())

    {

        std::cerr << "OpenCL-enabled GPU is not present." << std::endl;

        return EXIT_FAILURE;

    }


    if( argc <  3 )

    {

        std::cerr << "Error: missing arguments" << std::endl;

        std::cerr << "inputfile outputfile [num_dimensions]" << std::endl;

        return EXIT_FAILURE;

    }


    std::string inFile( argv[1] );

    std::string outFile( argv[2] );


    unsigned int dim = 3;

    ReaderType::Pointer reader;

    WriterType::Pointer writer;

    reader = ReaderType::New();

    writer = WriterType::New();


    reader->SetFileName( inFile );

    writer->SetFileName( outFile );


    float variance = 4.0;


    // test 1~8 threads for CPU

    int nThreads = 8;


    typedef itk::DiscreteGaussianImageFilter< InputImageType,
OutputImageType> CPUFilterType;

    CPUFilterType::Pointer CPUFilter = CPUFilterType::New();

    itk::TimeProbe cputimer;

    cputimer.Start();

    CPUFilter->SetNumberOfThreads( nThreads );

    CPUFilter->SetInput( reader->GetOutput() );

    CPUFilter->SetMaximumKernelWidth(10);

    CPUFilter->SetUseImageSpacingOff();

    CPUFilter->SetVariance( variance );

    CPUFilter->Update();

    cputimer.Stop();


//    typedef itk::MeanImageFilter< InputImageType, OutputImageType>
CPUFilterType;

//    CPUFilterType::Pointer CPUFilter = CPUFilterType::New();

//    itk::TimeProbe cputimer;

//    cputimer.Start();

//    CPUFilter->SetNumberOfThreads( nThreads );

//    CPUFilter->SetInput( reader->GetOutput() );

////    CPUFilter->SetMaximumKernelWidth(10);

////    CPUFilter->SetUseImageSpacingOff();

//    CPUFilter->SetRadius( variance );

//    CPUFilter->Update();

//    cputimer.Stop();


    std::cout << "CPU Gaussian Filter took " << cputimer.GetMean() <<
" seconds with "

              << CPUFilter->GetNumberOfThreads() << " threads.\n" << std::endl;


    // -------


    typedef itk::GPUDiscreteGaussianImageFilter< InputImageType,
OutputImageType> GPUFilterType;

    GPUFilterType::Pointer GPUFilter = GPUFilterType::New();

    itk::TimeProbe gputimer;

    gputimer.Start();

    GPUFilter->SetInput( reader->GetOutput() );

    GPUFilter->SetVariance( variance );

    GPUFilter->SetMaximumKernelWidth(10);

    GPUFilter->SetUseImageSpacingOff();

//    GPUFilter->DebugOn();

//    GPUFilter->GPUEnabledOff();

    GPUFilter->Print(std::cout);

    GPUFilter->Update();

    GPUFilter->GetOutput()->UpdateBuffers(); // synchronization point
(GPU->CPU memcpy)

    gputimer.Stop();

    std::cout << "GPU Gaussian Filter took " << gputimer.GetMean() <<
" seconds.\n" << std::endl;


//    typedef itk::GPUMeanImageFilter< InputImageType,
OutputImageType> GPUFilterType;

//    GPUFilterType::Pointer GPUFilter = GPUFilterType::New();

//    itk::TimeProbe gputimer;

//    gputimer.Start();

//    GPUFilter->SetInput( reader->GetOutput() );

////    GPUFilter->SetVariance( variance );

////    GPUFilter->SetMaximumKernelWidth(10);

////    GPUFilter->SetUseImageSpacingOff();

////    GPUFilter->DebugOn();

////    GPUFilter->Print(std::cout);

//    GPUFilter->SetRadius( variance );

//    GPUFilter->Update();

//    GPUFilter->GetOutput()->UpdateBuffers(); // synchronization
point (GPU->CPU memcpy)

//    gputimer.Stop();

//    std::cout << "GPU Gaussian Filter took " << gputimer.GetMean()
<< " seconds.\n" << std::endl;


    // ---------------

    // RMS Error check

    // ---------------


    double diff = 0;

    unsigned int nPix = 0;

    itk::ImageRegionIterator<OutputImageType>
cit(CPUFilter->GetOutput(),
CPUFilter->GetOutput()->GetLargestPossibleRegion());

    itk::ImageRegionIterator<OutputImageType>
git(GPUFilter->GetOutput(),
GPUFilter->GetOutput()->GetLargestPossibleRegion());


    for(cit.GoToBegin(), git.GoToBegin(); !cit.IsAtEnd(); ++cit, ++git)

    {

        double err = (double)(cit.Get()) - (double)(git.Get());

        //         if(err > 0.1 || (double)cit.Get() < 0.1) std::cout
<< "CPU : " << (double)(cit.Get()) << ", GPU : " <<
(double)(git.Get()) << std::endl;

        diff += err*err;

        nPix++;

    }


    writer->SetInput( GPUFilter->GetOutput() );

//    writer->SetInput( CPUFilter->GetOutput() );

    writer->Update();


    if (nPix > 0)

    {

        double RMSError = sqrt( diff / (double)nPix );

        std::cout << "RMS Error : " << RMSError << std::endl;

        // the CPU filter operator has type double

        // but the double precision is not well-supported on most GPUs

        // and by most drivers at this time.  Therefore, the GPU filter

        // operator has type float

        // relax the RMS threshold here to allow for errors due to

        // differences in precision

        // NOTE:

        //   a threshold of 1.2e-5 worked on linux and Mac, but not Windows

        //   why?

        double RMSThreshold = 1.7e-5;

        if (vnl_math_isnan(RMSError))

        {

            std::cout << "RMS Error is NaN! nPix: " << nPix << std::endl;

            return EXIT_FAILURE;

        }

        if (RMSError > RMSThreshold)

        {

            std::cout << "RMS Error exceeds threshold (" <<
RMSThreshold << ")" << std::endl;

            return EXIT_FAILURE;

        }

    }

    else

    {

        std::cout << "No pixels in output!" << std::endl;

        return EXIT_FAILURE;

    }


}



OUTPUT


Starting C:\DocsMaracuya\Build\Ejemplos\Gpu\GPUTest.exe...

Platform : AMD Accelerated Parallel Processing

Platform : AMD Accelerated Parallel Processing

Pitcairn

Maximum Work Item Sizes : { 256, 256, 256 }

Maximum Work Group Size : 256

Alignment in bits of the base address : 2048

Smallest alignment in bytes for any data type : 128

cl_khr_fp64 cl_amd_fp64 cl_khr_global_int32_base_atomics
cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics
cl_khr_local_int32_extended_atomics cl_khr_int64_base_atomics
cl_khr_int64_extended_atomics cl_khr_3d_image_writes
cl_khr_byte_addressable_store cl_khr_gl_sharing cl_ext_atomic_counters_32
cl_amd_device_attribute_query cl_amd_vec3 cl_amd_printf cl_amd_media_ops
cl_amd_media_ops2 cl_amd_popcnt cl_khr_d3d10_sharing
cl_amd_bus_addressable_memory cl_amd_c1x_atomics

CPU Gaussian Filter took 1.70355 seconds with 8 threads.


Defines: #define DIM_3

#define INTYPE short

#define OUTTYPE short

#define OPTYPE short


Defines: #define DIM_3

#define INTYPE short

#define OUTTYPE short

#define OPTYPE short


Defines: #define DIM_3

#define INTYPE short

#define OUTTYPE short

#define OPTYPE short


GPUDiscreteGaussianImageFilter (0000000002205DF0)

RTTI typeinfo: class itk::GPUDiscreteGaussianImageFilter<class
itk::GPUImage<short,3>,class itk::GPUImage<short,3> >

Reference Count: 1

Modified Time: 560

Debug: Off

Object Name:

Observers:

none

Inputs:

Primary: (000000000216E560) *

Indexed Inputs:

0: Primary (000000000216E560)

Required Input Names: Primary

NumberOfRequiredInputs: 1

Outputs:

Primary: (000000000218A070)

Indexed Outputs:

0: Primary (000000000218A070)

NumberOfRequiredOutputs: 1

Number Of Threads: 8

ReleaseDataFlag: Off

ReleaseDataBeforeUpdateFlag: Off

AbortGenerateData: Off

Progress: 0

Multithreader:

RTTI typeinfo: class itk::MultiThreader

Reference Count: 1

Modified Time: 499

Debug: Off

Object Name:

Observers:

none

Thread Count: 8

Global Maximum Number Of Threads: 128

Global Default Number Of Threads: 8

CoordinateTolerance: 1e-006

DirectionTolerance: 1e-006

Variance: [4, 4, 4]

MaximumError: [0.01, 0.01, 0.01]

MaximumKernelWidth: 10

FilterDimensionality: 3

UseImageSpacing: 0

InternalNumberOfStreamDivisions: 9

GPU: Enabled

GPU Gaussian Filter took 0.111351 seconds.


RMS Error : 26.4279

RMS Error exceeds threshold (1.7e-005)

C:\DocsMaracuya\Build\Ejemplos\Gpu\GPUTest.exe exited with code 1


-- 
José Ignacio Prieto
celular(nuevo): 94348182
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.itk.org/pipermail/insight-users/attachments/20140415/9ca04190/attachment-0001.html>


More information about the Insight-users mailing list