[ITK-users] Writing from an external buffer to VectorImage

Dženan Zukić dzenanz at gmail.com
Mon Jul 3 08:27:36 EDT 2017


Thanks for sharing your code Gordian, it will probably help somebody in the
future!

On Mon, Jul 3, 2017 at 7:22 AM, Kabelitz, Gordian <
Gordian.Kabelitz at medma.uni-heidelberg.de> wrote:

> Hello,
>
>
>
> I want to provide a solution to the question I had. In the case that
> someone else need an code example.
>
> Just to give an idea how the implementation can look like.
>
> The main reads an image and passes this image to the function that calls
> the kernel.
>
> After the kernel is done the result is copied to the new vector image [1].
>
>
>
> int main(int argc, char* argv[])
>
> {
>
>
>
>        createImage();
>
>        auto nrrdIO = NRRDIOType::New();
>
>        auto reader = ReaderType::New();
>
>        reader->SetFileName("newImage.nrrd");
>
>        reader->SetImageIO(nrrdIO);
>
>
>
>        try
>
>        {
>
>               reader->Update();
>
>        }
>
>        catch (itk::ExceptionObject e)
>
>        {
>
>               std::cerr << "Exception caught!\n";
>
>               std::cerr << e << std::endl;
>
>        }
>
>
>
>        auto image = reader->GetOutput();
>
>
>
>        auto size = image->GetLargestPossibleRegion().GetSize();
>
>        int sz[3] = {size[0],size[1],size[2]};
>
>        auto totalVoxel = sz[0] * sz[1] * sz[2];
>
>
>
>        auto cuda = CudaTest();
>
>        auto out = new float4[totalVoxel];
>
>        cuda.runKernel(out, image->GetBufferPointer(), sz);
>
>
>
>        auto vecImage = VectorImageType::New();
>
>        VectorImageType::IndexType index;
>
>        index[0] = 0;
>
>        index[1] = 0;
>
>        index[2] = 0;
>
>        VectorImageType::RegionType region(index,size);
>
>        vecImage->SetRegions(region);
>
>        vecImage->SetVectorLength(3);
>
>        vecImage->Allocate();
>
>
>
>        // copy image buffer to vecImage, component by component
>
>        auto vecBuffer = vecImage->GetBufferPointer();
>
>        auto j = 0;
>
>        for (auto i = 0; i < totalVoxel; ++i)
>
>        {
>
>               vecBuffer[j] = out[i].x; j++;
>
>               vecBuffer[j] = out[i].y; j++;
>
>               vecBuffer[j] = out[i].z; j++;
>
>        }
>
>
>
>        auto writer = WriterType::New();
>
>        writer->SetInput(vecImage);
>
>        writer->SetFileName("out.nrrd");
>
>        writer->SetImageIO(nrrdIO);
>
>        try
>
>        {
>
>               writer->Update();
>
>        }
>
>        catch (itk::ExceptionObject e)
>
>        {
>
>               std::cerr << "Exception caught!\n";
>
>               std::cerr << e << std::endl;
>
>        }
>
>
>
>        delete[] out;
>
>
>
> }
>
>
>
> The function that starts the kernel looks like this:
>
>
>
> void CudaTest::runKernel(float4* out, float* data, int* size)
>
> {
>
>        auto totalVoxel = size[0] * size[1] * size[2];
>
>        // copy image to texture memory
>
>        auto channelDesc = cudaCreateChannelDesc<float>();
>
>        cudaArray *cuArray;
>
>        auto extent = make_cudaExtent(size[0], size[1], size[2]);
>
>        gpuErrChk(cudaMalloc3DArray(&cuArray, &channelDesc, extent));
>
>        cudaMemcpy3DParms copyparms = { 0 };
>
>        copyparms.extent = extent;
>
>        copyparms.dstArray = cuArray;
>
>        copyparms.kind = cudaMemcpyHostToDevice;
>
>        copyparms.srcPtr = make_cudaPitchedPtr((void*)data, size[0] *
> sizeof(float), size[0], size[1]);
>
>        gpuErrChk(cudaMemcpy3D(&copyparms));
>
>
>
>        // set texture configuration and bind array to texture
>
>        tex_volume.addressMode[0] = cudaAddressModeClamp;
>
>        tex_volume.addressMode[1] = cudaAddressModeClamp;
>
>        tex_volume.addressMode[2] = cudaAddressModeClamp;
>
>        tex_volume.filterMode = cudaFilterModeLinear;
>
>        tex_volume.normalized = false;
>
>        gpuErrChk(cudaBindTextureToArray(tex_volume, cuArray,
> channelDesc));
>
>
>
>        // allocate memory for gradient image data on the device
>
>        float4 *dev_gradientImage;
>
>        gpuErrChk(cudaMalloc(&dev_gradientImage, totalVoxel * sizeof(float4
> )));
>
>
>
>        // copy image dimension to device
>
>        int* dev_dimension;
>
>        gpuErrChk(cudaMalloc(&dev_dimension, 3 * sizeof(int)));
>
>        gpuErrChk(cudaMemcpy(dev_dimension, size, 3 * sizeof(int),
> cudaMemcpyHostToDevice));
>
>
>
>        // start computation for the gradient image
>
>        dim3 blockDim(16, 8, 8); //<- threads in block, change according
> to used GPU, max 1024 threads in a single block
>
>        dim3 grid((size[0] / blockDim.x) + 1, (size[1] / blockDim.y) + 1, (
> size[2] / blockDim.z) + 1);
>
>
>
>        computeGradientImage << <grid, blockDim >> >(dev_gradientImage,
> dev_dimension);
>
>        gpuErrChk(cudaGetLastError());
>
>        gpuErrChk(cudaDeviceSynchronize());
>
>
>
>        // unbind texture
>
>        gpuErrChk(cudaUnbindTexture(tex_volume));
>
>        gpuErrChk(cudaFreeArray(cuArray));
>
>
>
>        gpuErrChk(cudaMemcpy(out, dev_gradientImage, totalVoxel * sizeof(
> float4), cudaMemcpyDeviceToHost));
>
>        printf("Kernel is done.\n");
>
> }
>
>
>
> The kernel used in this example.
>
>
>
> __global__ void computeGradientImage(float4* gradientImage, int* dimension
> )
>
> {
>
>        // every thread computes the float4 voxel with theta,phi,magnitude
> from gradient image
>
>        int idx = blockIdx.x * blockDim.x + threadIdx.x;
>
>        int idy = blockIdx.y * blockDim.y + threadIdx.y;
>
>        int idz = blockIdx.z * blockDim.z + threadIdx.z;
>
>
>
>        if (idx < dimension[0] && idy < dimension[1] && idz < dimension[2])
>
>        {
>
>               // define sobel filter for each direction
>
>               float x_000 = tex3D(tex_volume, idx - 1, idy - 1, idz - 1);
>
>               float x_001 = tex3D(tex_volume, idx - 1, idy - 1, idz    );
>
>               float x_002 = tex3D(tex_volume, idx - 1, idy - 1, idz + 1);
>
>               float x_010 = tex3D(tex_volume, idx - 1, idy,     idz - 1);
>
>               float x_011 = tex3D(tex_volume, idx - 1, idy,     idz    );
>
>               float x_012 = tex3D(tex_volume, idx - 1, idy,     idz + 1);
>
>               float x_020 = tex3D(tex_volume, idx - 1, idy + 1, idz - 1);
>
>               float x_021 = tex3D(tex_volume, idx - 1, idy + 1, idz    );
>
>               float x_022 = tex3D(tex_volume, idx - 1, idy + 1, idz + 1);
>
>
>
>               float x_100 = tex3D(tex_volume, idx,     idy - 1, idz - 1);
>
>               float x_101 = tex3D(tex_volume, idx,     idy - 1, idz    );
>
>               float x_102 = tex3D(tex_volume, idx,     idy - 1, idz + 1);
>
>               float x_110 = tex3D(tex_volume, idx,     idy,     idz - 1);
>
> //            float x_111 = tex3D(tex_volume, idx,     idy,     idz    );
>
>               float x_112 = tex3D(tex_volume, idx,     idy,     idz + 1);
>
>               float x_120 = tex3D(tex_volume, idx,     idy + 1, idz - 1);
>
>               float x_121 = tex3D(tex_volume, idx,     idy + 1, idz    );
>
>               float x_122 = tex3D(tex_volume, idx,     idy + 1, idz + 1);
>
>
>
>               float x_200 = tex3D(tex_volume, idx + 1, idy - 1, idz - 1);
>
>               float x_201 = tex3D(tex_volume, idx + 1, idy - 1, idz    );
>
>               float x_202 = tex3D(tex_volume, idx + 1, idy - 1, idz + 1);
>
>               float x_210 = tex3D(tex_volume, idx + 1, idy,     idz - 1);
>
>               float x_211 = tex3D(tex_volume, idx + 1, idy,     idz    );
>
>               float x_212 = tex3D(tex_volume, idx + 1, idy,     idz + 1);
>
>               float x_220 = tex3D(tex_volume, idx + 1, idy + 1, idz - 1);
>
>               float x_221 = tex3D(tex_volume, idx + 1, idy + 1, idz    );
>
>               float x_222 = tex3D(tex_volume, idx + 1, idy + 1, idz + 1);
>
>
>
>               // derivative in x direction
>
>               auto centerX = - 1.f*x_000 - 3.f*x_010 - 1.f*x_020
>
>                              - 3.f*x_001 - 6.f*x_011 - 3.f*x_021
>
>                              - 1.f*x_002 - 3.f*x_012 - 1.f*x_022
>
>                              + 1.f*x_200 + 3.f*x_210 + 1.f*x_220
>
>                              + 3.f*x_201 + 6.f*x_211 + 3.f*x_221
>
>                              + 1.f*x_202 + 3.f*x_212 + 1.f*x_222;
>
>
>
>               // derivative in y direction
>
>               auto centerY = - 1.f*x_000 - 3.f*x_100 - 1.f*x_200
>
>                              - 3.f*x_001 - 6.f*x_101 - 3.f*x_201
>
>                              - 1.f*x_002 - 3.f*x_102 - 1.f*x_202
>
>                              + 1.f*x_020 + 3.f*x_120 + 1.f*x_220
>
>                              + 3.f*x_021 + 6.f*x_121 + 3.f*x_221
>
>                              + 1.f*x_022 + 3.f*x_122 + 1.f*x_222;
>
>
>
>               // derivative in z direction
>
>               auto centerZ = - 1.f*x_000 - 3.f*x_100 - 1.f*x_200
>
>                              - 3.f*x_010 - 6.f*x_110 - 3.f*x_210
>
>                              - 1.f*x_020 - 3.f*x_120 - 1.f*x_220
>
>                              + 1.f*x_002 + 3.f*x_102 + 1.f*x_202
>
>                              + 3.f*x_012 + 6.f*x_112 + 3.f*x_212
>
>                              + 1.f*x_022 + 3.f*x_122 + 1.f*x_222;
>
>
>
>               // magnitude of each voxels gradient
>
>               auto magn = sqrtf(centerX*centerX + centerY*centerY +
> centerZ*centerZ);
>
>
>
>               gradientImage[idx + dimension[0] * (idy + idz * dimension[1])]
> = make_float4(centerX, centerY, centerZ, magn);
>
>        }
>
> }
>
>
>
> with kind regards,
>
> Gordian
>
>
>
> [1]: https://itk.org/Doxygen/html/classitk_1_1VectorImage.html
>
>
>
>
>
> *Von:* Dženan Zukić [mailto:dzenanz at gmail.com]
> *Gesendet:* Mittwoch, 28. Juni 2017 17:17
> *An:* Kabelitz, Gordian
> *Cc:* insight-users at itk.org
> *Betreff:* Re: [ITK-users] Writing from an external buffer to VectorImage
>
>
>
> Hi Gordian,
>
>
>
> this approach looks like it should work. What is wrong with it?
>
>
>
> Regards,
>
> Dženan Zukić, PhD, Senior R&D Engineer, Kitware (Carrboro, N.C.)
>
>
>
> On Wed, Jun 28, 2017 at 9:51 AM, Kabelitz, Gordian <
> Gordian.Kabelitz at medma.uni-heidelberg.de> wrote:
>
> Hello,
>
> i computed a gradient with my own function and as a result a pointer to an
> image buffer is provided. I know the size, origin and spacing of the
> gradient component image.
> I want to copy the gradient image into an itk::VectorImage with the
> components for the x,y,z gradients.
>
> The way I copied the image to the GPU is that I retrieved the buffer
> pointer from my input image and use the pointer to copy the image data to
> the GPU.
> I used the way proposed in [1]. The computeGradientImage method is listed
> at the end of this mail.
>
> [...]
> // get float pointer to image data
> ImageType::Pointer image = reader->GetOutput();
> Image->Update();
>
> float* data = image.GetBufferPointer();
> // copy image data to GPU texture memory (this works)
> gpu_dev->setVoxels(dimension, voxelSize, data);
> [...]
> computeGradientImage<<<n,m>>> (dev_gradientImage, dimension);
>
> // copy resulting gradientImage to host variable
> float4* host_gradientImage;
> cudaMemcpy(host_gradient, dev_gradientImage, numberOfVoxels*sizeof(float4))
> ;
>
> --> Pseudo Code <--
> // Now I want to reverse the copy process. I have a float4 image and want
> to copy this into a itk::VectorImage with VariableVectorLength of 3
> (skipping the magnitude value).
> [...] -> size, spacing, origin, region definition
> Itk::VectorImageType vecImage = VectorImageType::New();
> vecImage->setRegion(region);
> vecImage ->SetVectorLength(3);
> vecImage->Allocate();
>
> // copy image buffer to vecImage, component by component
> auto vecBuffer = vecImage->getBufferPointer();
> auto j = 0;
> for (i=0; i<totalVoxel; ++i)
> {
>         vecbuffer[j] = host_gradient[i].x; j++;
>         vecbuffer[j] = host_gradient[i].y; j++;
>         vecbuffer[j] = host_gradient[i].z; j++;
> }
>
> // save vecImage as nrrd image
> [...]
>
> I haven't found a way to achieve my idea.
> Are there any suggestions or examples?
> As far I can see I cannot use the itk::ImportImageFilter.
>
> Thank you for any suggestions.
> With kind regards,
> Gordian
>
> [1]: https://itk.org/CourseWare/Training/GettingStarted-V.pdf
>
> void computeGradientImage(float4* gradientImage, int* dimension)
> {
>         // every thread computes the float4 voxel with theta,phi,magnitude
> from gradient image
>         int idx = blockIdx.x * blockDim.x + threadIdx.x;
>         int idy = blockIdx.y * blockDim.y + threadIdx.y;
>         int idz = blockIdx.z * blockDim.z + threadIdx.z;
>
>         if (idx < dimension[0] && idy < dimension[1] && idz < dimension[2])
>         {
>                 // define sobel filter for each direction
>                 [...]
>
>                 // run sobel on image in texture memory for each direction
> and put result into a float4 image
>                 gradientImage[idx + dimension[0] * (idy + idz *
> dimension[1])] = make_float4(sobelX, sobelY, sobelZ, magn);
>         }
> }
>
>
> _____________________________________
> Powered by www.kitware.com
>
> Visit other Kitware open-source projects at
> http://www.kitware.com/opensource/opensource.html
>
> Kitware offers ITK Training Courses, for more information visit:
> http://www.kitware.com/products/protraining.php
>
> Please keep messages on-topic and check the ITK FAQ at:
> http://www.itk.org/Wiki/ITK_FAQ
>
> Follow this link to subscribe/unsubscribe:
> http://public.kitware.com/mailman/listinfo/insight-users
>
>
>
> _____________________________________
> Powered by www.kitware.com
>
> Visit other Kitware open-source projects at
> http://www.kitware.com/opensource/opensource.html
>
> Kitware offers ITK Training Courses, for more information visit:
> http://www.kitware.com/products/protraining.php
>
> Please keep messages on-topic and check the ITK FAQ at:
> http://www.itk.org/Wiki/ITK_FAQ
>
> Follow this link to subscribe/unsubscribe:
> http://public.kitware.com/mailman/listinfo/insight-users
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://public.kitware.com/pipermail/insight-users/attachments/20170703/7614ad6d/attachment-0001.html>


More information about the Insight-users mailing list