如何销毁CUDA图形数据类型

问题描述

我使用函数从一些RGB数据创建OpenGL纹理/ CUDA表面对。 cudaSurfaceObject_t可以在CUDA内核中用于GPU加速图像处理,而gluint可以用于呈现CUDA内核的结果。该功能在以下程序中提供:



#include <glad/glad.h>
#include <GLFW/glfw3.h>

#include <cudaGL.h>
#include <cuda_gl_interop.h>

#include <iostream>



#define cudaCheckerror() { \
    cudaError_t err = cudaGetLastError(); \
    if(err != cudaSuccess) { \
      printf("Cuda error: %s:%d: %s\n",__FILE__,__LINE__,cudaGetErrorString(err)); \
      exit(1); \
    } \
  }

void createTextureSurfacePair(int width,int height,uint8_t* const data,gluint& textureOut,cudaGraphicsResource_t& graphicsResourceOut,cudaSurfaceObject_t& surfaceOut) {

    // Create the OpenGL texture that will be displayed with GLAD and GLFW
    glGenTextures(1,&textureOut);
    // Bind to our texture handle
    glBindTexture(GL_TEXTURE_2D,textureOut);
    // Set texture interpolation methods for minification and magnification
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MIN_FILTER,GL_NEAREST);
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_MAG_FILTER,GL_NEAREST);
    // Set texture clamping method
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_S,GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D,GL_TEXTURE_WRAP_T,GL_CLAMP);
    // Create the texture and its attributes
    glTexImage2D(GL_TEXTURE_2D,// Type of texture
        0,// Pyramid level (for mip-mapping) - 0 is the top level
        GL_RGBA,// Internal color format to convert to
        width,// Image width  i.e. 640 for Kinect in standard mode
        height,// Image height i.e. 480 for Kinect in standard mode
        0,// Border width in pixels (can either be 1 or 0)
        GL_BGR,// Input image format (i.e. GL_RGB,GL_RGBA,GL_BGR etc.)
        GL_UNSIGNED_BYTE,// Image data type.
        data);            // The actual image data itself
    //Note that the type of this texture is an RGBA UNSIGNED_BYTE type. When CUDA surfaces
    //are synchronized with OpenGL textures,the surfaces will be of the same type.
    //They won't kNow or care about their data types though,for they are all just byte arrays
    //at heart. So be careful to ensure that any CUDA kernel that handles a CUDA surface
    //uses it as an appropriate type. You will see that the update_surface kernel (defined 
    //above) treats each pixel as four unsigned bytes along the X-axis: one for red,green,blue,//and alpha respectively.

    //Create the CUDA array and texture reference
    cudaArray* bitmap_d;
    //Register the GL texture with the CUDA graphics library. A new cudaGraphicsResource is created,and its address is placed in cudaTextureID.
    //Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__OPENGL.html#group__CUDART__OPENGL_1g80d12187ae7590807c7676697d9fe03d
    cudaGraphicsglregisterImage(&graphicsResourceOut,textureOut,GL_TEXTURE_2D,cudaGraphicsRegisterFlagsNone);
    cudaCheckerror();
    //Map graphics resources for access by CUDA.
    //Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1gad8fbe74d02adefb8e7efb4971ee6322
    cudaGraphicsMapResources(1,&graphicsResourceOut,0);
    cudaCheckerror();
    //Get the location of the array of pixels that was mapped by the prevIoUs function and place that address in bitmap_d
    //Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP_1g0dd6b5f024dfdcff5c28a08ef9958031
    cudaGraphicsSubResourceGetMappedArray(&bitmap_d,graphicsResourceOut,0);
    cudaCheckerror();
    //Create a CUDA resource descriptor. This is used to get and set attributes of CUDA resources.
    //This one will tell CUDA how we want the bitmap_surface to be configured.
    //Documentation for the struct: https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaResourceDesc.html#structcudaResourceDesc
    struct cudaResourceDesc resDesc;
    //Clear it with 0s so that some flags aren't arbitrarily left at 1s
    memset(&resDesc,sizeof(resDesc));
    //Set the resource type to be an array for convenient processing in the CUDA kernel.
    //List of resTypes: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g067b774c0e639817a00a972c8e2c203c
    resDesc.resType = cudaResourceTypeArray;
    //Bind the new descriptor with the bitmap created earlier.
    resDesc.res.array.array = bitmap_d;
    //Create a new CUDA surface ID reference.
    //This is really just an unsigned long long.
    //Docuentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1gbe57cf2ccbe7f9d696f18808dd634c0a
    surfaceOut = 0;
    //Create the surface with the given description. That surface ID is placed in bitmap_surface.
    //Documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__SURFACE__OBJECT.html#group__CUDART__SURFACE__OBJECT_1g958899474ab2c5f40d233b524d6c5a01
    cudaCreateSurfaceObject(&surfaceOut,&resDesc);
    cudaCheckerror();
}


void initGL() {

    // Setup window
    if (!glfwInit())
        return;

    // Decide GL+GLSL versions
#if __APPLE__
    // GL 3.2 + GLSL 150
    const char* glsl_version = "#version 150";
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR,3);
    glfwWindowHint(GLFW_CONTEXT_VERSION_MInor,2);
    glfwWindowHint(GLFW_OPENGL_PROFILE,GLFW_OPENGL_CORE_PROFILE);  // 3.2+ only
    glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT,GL_TRUE);            // required on Mac
#else
    // GL 3.0 + GLSL 130
    const char* glsl_version = "#version 130";
    glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR,0);
    //glfwWindowHint(GLFW_OPENGL_PROFILE,GLFW_OPENGL_CORE_PROFILE);  // 3.2+ only
    //glfwWindowHint(GLFW_OPENGL_FORWARD_COMPAT,GL_TRUE);            // 3.0+ only
#endif

    // Create window with graphics context
    GLFWwindow* currentGLFWWindow = glfwCreateWindow(1280,720,"Raytracing in One Weekend",NULL,NULL);
    if (currentGLFWWindow == NULL)
        return;
    glfwMakeContextCurrent(currentGLFWWindow);
    glfwSwapInterval(3); // Enable vsync

    if (!gladLoadGL()) {
        // GLAD Failed
        printf( "GLAD Failed to initialize :(" );
        return;
    }
}


int main() {
    initGL();

    int size = 500;

    uint8_t* data = new uint8_t[size * size * 3]; //dummy 100x100 RGB image

    cudaSurfaceObject_t a;
    cudaGraphicsResource_t b;
    gluint c;

    for (int i = 0; i < 10000; i++) {
/*------ATTEMPT TO CREATE CUDA SURFACE AND OPENGL TEXTURE------------*/
        createTextureSurfacePair(size,size,data,c,b,a);

/*------ATTEMPT TO DESTROY CUDA SURFACE AND OPENGL TEXTURE------------*/
        //Destroy surface
        cudaDestroySurfaceObject(a);
        //Destroy graphics resource
        cudaGraphicsUnmapResources(1,&b);
        //Destroy texture
        glDeleteTextures(1,&c);

        if (i % 100 == 0) printf("Iteration %d\n",i);
    }
}

此程序似乎存在内存泄漏,因为它导致专用GPU内存迅速增加,直到程序崩溃为止。我没有在主要功能中销毁什么?

解决方法

当我添加以下行时:

cudaGraphicsUnregisterResource(b);
在代码中此行的

之后:

cudaGraphicsUnmapResources(1,&b);

您的程序对我而言已完成(即,它在指定的10000个循环中运行)而没有引发任何错误。在cuda-memcheck下运行时,它也可以正常运行。

This functioncudaGraphicsGLRegisterImage的“破坏者”。您可以通过以下方式了解这种情况:

  1. 研究使用CUDA / OpenGL互操作的各种CUDA示例代码(例如simpleGLES,postProcessGL,imageDenoisingGL,bilateralFilter等)。

  2. 指的是runtime API docs for cudaGraphicsGLRegisterImage,并注意到它在功能描述的底部列出:

另请参阅: cudaGraphicsUnregisterResource ,cudaGraphicsMapResources,cudaGraphicsSubResourceGetMappedArray,cuGraphicsGLRegisterImage