形态学操作图像处理

问题描述

我为一个简单的图像处理任务编写了一个 OpenCL 内核:进行开运算(腐蚀 + 膨胀),然后根据开运算的结果执行闭运算(膨胀 + 腐蚀)。

问题是我得到的输出如下图所示:

enter image description here

我试过只做开运算,好像还挺稳定的,但是当我排队开合的时候,出现了那些黑色的横线。即使是开场变换也不是那么好,因为我们已经可以看到一些水平延伸。

这是 OpenCL 内核:

kernel void computeSmooth(
        const int width,const int height,const int gridStep,global const unsigned char* img,global unsigned char* erodeOpen,global unsigned char* dilateOpen,global unsigned char* dilateClose,global unsigned char* erodeClose)
{
    size_t id = get_global_id(0);
    const int size = 2;//(gridStep * gridStep) / 16;

    areaopening(id,size,width,height,img,erodeOpen,dilateOpen);
    areaClosing(id,dilateOpen,dilateClose,erodeClose);
}

bool validindex(int index,int width,int height)
{
    return index > 0 && index < (width * height * 3);
}

unsigned char* areaopening(
        const int id,const int size,const int width,global unsigned char* dilateOpen)
{
    int radius = size / 2;
    int baseIndex = id*3;
    int index;

    int red = 255;
    int green = 255;
    int blue = 255;

    // erode
    for(int l = -radius; l < radius; ++l)
    {
        for(int c = -radius; c < radius; ++c)
        {
            if(l == 0 && c == 0)
                continue;
            index = baseIndex + (l * width * 3) + (c * 3);
            if(validindex(index,height))
            {
                if(img[index] < red)
                    red = img[index];
                if(img[index+1] < green)
                    green = img[index+1];
                if(img[index+2] < blue)
                    blue = img[index+2];
            }
        }
    }
    erodeOpen[baseIndex] = red;
    erodeOpen[baseIndex+1] = green;
    erodeOpen[baseIndex+2] = blue;

    red = 0;
    green = 0;
    blue = 0;

    // dilate
    for(int l = -radius; l < radius; ++l)
    {
        for(int c = -radius; c < radius; ++c)
        {
            index = baseIndex + (l * width * 3) + (c * 3);
            if(validindex(index,height))
            {
                if(erodeOpen[index] > red)
                    red = erodeOpen[index];
                if(erodeOpen[index+1] > green)
                    green = erodeOpen[index+1];
                if(erodeOpen[index+2] > blue)
                    blue = erodeOpen[index+2];
            }
        }
    }
    dilateOpen[baseIndex] = red;
    dilateOpen[baseIndex+1] = green;
    dilateOpen[baseIndex+2] = blue;
}

unsigned char* areaClosing(
        const int id,global unsigned char* img,global unsigned char* erodeClose)
{
    int radius = size / 2;
    int baseIndex = id*3;
    int index;

    int red = 0;
    int green = 0;
    int blue = 0;

    // dilate
    for(int l = -radius; l < radius; ++l)
    {
        for(int c = -radius; c < radius; ++c)
        {
            if(l == 0 && c == 0)
                continue;
            index = baseIndex + (l * width * 3) + (c * 3);
            if(validindex(index,height))
            {
                if(img[index] > red)
                    red = img[index];
                if(img[index+1] > green)
                    green = img[index+1];
                if(img[index+2] > blue)
                    blue = img[index+2];
            }
        }
    }
    dilateClose[baseIndex] = red;
    dilateClose[baseIndex+1] = green;
    dilateClose[baseIndex+2] = blue;
    
    red = 255;
    green = 255;
    blue = 255;

    // erode
    for(int l = -radius; l < radius; ++l)
    {
        for(int c = -radius; c < radius; ++c)
        {
            index = baseIndex + (l * width * 3) + (c * 3);
            if(validindex(index,height))
            {
                if(dilateClose[index] < red)
                    red = dilateClose[index];
                if(dilateClose[index+1] < green)
                    green = dilateClose[index+1];
                if(dilateClose[index+2] < blue)
                    blue = dilateClose[index+2];
            }
        }
    }
    erodeClose[baseIndex] = red;
    erodeClose[baseIndex+1] = green;
    erodeClose[baseIndex+2] = blue;
}

原始图像是一个无符号字符数组,3 个 RGB 通道,每个通道 8 位。 宿主代码使用 C++。

我已经尝试过 OpenCV,它可以正确地做我想做的事情,但我确实想让我的实现工作,并了解它是如何制作的。

这是显示两种方法的主机代码,一种使用我的 GPU 内核,另一种使用 OpenCV(现在已评论):

void Window::computeSmooth()
{

    cl::Context context = program.getContext();
    cl::CommandQueue queue = program.getCommandQueue();
    cl::Kernel smoothKernel = program.getSmoothKernel();
    
    // reset smooth image data
    img.smooth.fill(QColor(0,255));

    // prepare data
    const int nbElems{img.width * img.height * 3};
    cl::Buffer originalImage(context,CL_MEM_READ_ONLY | CL_MEM_copY_HOST_PTR,nbElems * sizeof(unsigned char),img.originalRAW);
    cl::Buffer erodeOpen(context,CL_MEM_READ_WRITE,nbElems * sizeof(unsigned char));
    cl::Buffer dilateOpen(context,nbElems * sizeof(unsigned char));
    cl::Buffer dilateClose(context,nbElems * sizeof(unsigned char));
    cl::Buffer erodeClose(context,nbElems * sizeof(unsigned char));

    // set kernel parameters
    smoothKernel.setArg(0,img.width);
    smoothKernel.setArg(1,img.height);
    smoothKernel.setArg(2,grid.step);
    smoothKernel.setArg(3,originalImage);
    smoothKernel.setArg(4,erodeOpen);
    smoothKernel.setArg(5,dilateOpen);
    smoothKernel.setArg(6,dilateClose);
    smoothKernel.setArg(7,erodeClose);

    // launch kernel on the compute device
    queue.enqueueNDRangeKernel(smoothKernel,cl::NullRange,img.width * img.height,cl::NullRange);

    // get result back to host
    queue.enqueueReadBuffer(erodeClose,CL_TRUE,img.smoothRAW.get());

/*
    // OpenCV
    cv::Mat image = cv::Mat(img.height,img.width,CV_8UC3,img.originalRAW);
    int morph_size = 1;
    cv::Mat element = cv::getStructuringElement(cv::MORPH_RECT,cv::Size(2 * morph_size + 1,2 * morph_size + 1),cv::Point(morph_size,morph_size)); 
    cv::Mat opening;
    cv::Mat closing;

    // opening
    cv::morphologyEx(image,opening,cv::MORPH_OPEN,element,cv::Point(-1,-1),1);

    // Closing
    cv::morphologyEx(opening,closing,cv::MORPH_CLOSE,1);

    unsigned char* data = closing.data;
    for(int i{0}; i < (img.height * img.width * 3); i += 3)
    {
        img.smoothRAW[i] = *(closing.data + i);
        img.smoothRAW[i+1] = *(closing.data + i + 1);
        img.smoothRAW[i+2] = *(closing.data + i + 2);
    }
*/

将不胜感激。 提前致谢。

解决方法

工作组内的线程由 32 个(NVIDIA 扭曲)或 64 个(AMD 波前)的组执行。

erodeOpendilateClose 很可能在填充数据之前被使用。为确保在使用前先填充它们,请添加屏障:

erodeOpen[baseIndex+2] = blue;
barrier(CLK_GLOBAL_MEM_FENCE);

dilateClose[baseIndex+2] = blue;
barrier(CLK_GLOBAL_MEM_FENCE);

====== 更新 ========

我没有注意到 dilateOpen 也被传递给了 areaClosing(),它在函数内部被命名为 img。 然后在调用 areaOpening()areaClosing() 之间添加屏障:

areaOpening(id,size,width,height,img,erodeOpen,dilateOpen);
barrier(CLK_GLOBAL_MEM_FENCE);
areaClosing(id,dilateOpen,dilateClose,erodeClose);

这应该可以解决问题,尤其是当你分成 2 个内核时,它会起作用。