OpenCL光线跟踪程序在CPU上工作正常,但在GPU上始终不正常

问题描述

[请查看下面的编辑,问题的解决方案可能就在这里]

我正在尝试通过研究小型光线跟踪器来学习OpenCL(请参见以下link的代码)。

我没有“真正的” GPU,我目前正在装有Intel®Iris™Graphics 6100图形卡的macosx笔记本电脑上。

代码在CPU上运行良好,但在GPU上的行为却很奇怪。它的工作(或不工作)取决于每个像素的样本数量(在场景中传播光线后,通过像素拍摄以得到其颜色的光线的数量)。如果我采样少量(64),则可以得到1280x720的图片,但是如果采样128个样本,则只能得到较小的图片。据我了解,样本数量不应有任何变化(当然,图片质量除外)。我错过了与OpenCL / GPU纯粹相关的东西吗?

此外,似乎是从崩溃的GPU内存中提取了结果:

queue.enqueueReadBuffer(cl_output,CL_TRUE,image_width * image_height * sizeof(cl_float4),cpu_output);

在此阶段,我得到一个“中止陷阱:6”。

我想念一些东西。


[EDIT] 经过一番研究,我发现了一条有趣的线索:图形卡可能会因时间过多而自愿中止该任务。可以采用这种方式来避免“冻结”屏幕。 This话题对此进行讨论。

您对此有何看法?

我找不到关闭此行为的方法。你知道怎么办吗?


以下是文件:

main.cpp:

// OpenCL based simple sphere path tracer by Sam Lapere,2016
// based on smallpt by Kevin Beason 
// http://raytracey.blogspot.com 

#include <iostream>
#include <fstream>
#include <vector>
#include <CL\cl.hpp>

using namespace std;
using namespace cl;

const int image_width = 1280;
const int image_height = 720;

cl_float4* cpu_output;
CommandQueue queue;
Device device;
Kernel kernel;
Context context;
Program program;
Buffer cl_output;
Buffer cl_spheres;

// dummy variables are required for memory alignment
// float3 is considered as float4 by OpenCL
struct Sphere
{
    cl_float radius;
    cl_float dummy1;   
    cl_float dummy2;
    cl_float dummy3;
    cl_float3 position;
    cl_float3 color;
    cl_float3 emission;
};

void pickPlatform(Platform& platform,const vector<Platform>& platforms){

    if (platforms.size() == 1) platform = platforms[0];
    else{
        int input = 0;
        cout << "\nChoose an OpenCL platform: ";
        cin >> input;

        // handle incorrect user input
        while (input < 1 || input > platforms.size()){
            cin.clear(); //clear errors/bad flags on cin
            cin.ignore(cin.rdbuf()->in_avail(),'\n'); // ignores exact number of chars in cin buffer
            cout << "No such option. Choose an OpenCL platform: ";
            cin >> input;
        }
        platform = platforms[input - 1];
    }
}

void pickDevice(Device& device,const vector<Device>& devices){

    if (devices.size() == 1) device = devices[0];
    else{
        int input = 0;
        cout << "\nChoose an OpenCL device: ";
        cin >> input;

        // handle incorrect user input
        while (input < 1 || input > devices.size()){
            cin.clear(); //clear errors/bad flags on cin
            cin.ignore(cin.rdbuf()->in_avail(),'\n'); // ignores exact number of chars in cin buffer
            cout << "No such option. Choose an OpenCL device: ";
            cin >> input;
        }
        device = devices[input - 1];
    }
}

void printErrorLog(const Program& program,const Device& device){

    // Get the error log and print to console
    string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
    cerr << "Build log:" << std::endl << buildlog << std::endl;

    // Print the error log to a file
    FILE *log = fopen("errorlog.txt","w");
    fprintf(log,"%s\n",buildlog);
    cout << "Error log saved in 'errorlog.txt'" << endl;
    system("PAUSE");
    exit(1);
}

void initOpenCL()
{
    // Get all available OpenCL platforms (e.g. AMD OpenCL,Nvidia CUDA,Intel OpenCL)
    vector<Platform> platforms;
    Platform::get(&platforms);
    cout << "Available OpenCL platforms : " << endl << endl;
    for (int i = 0; i < platforms.size(); i++)
        cout << "\t" << i + 1 << ": " << platforms[i].getInfo<CL_PLATFORM_NAME>() << endl;

    // Pick one platform
    Platform platform;
    pickPlatform(platform,platforms);
    cout << "\nUsing OpenCL platform: \t" << platform.getInfo<CL_PLATFORM_NAME>() << endl;

    // Get available OpenCL devices on platform
    vector<Device> devices;
    platform.getDevices(CL_DEVICE_TYPE_ALL,&devices);

    cout << "Available OpenCL devices on this platform: " << endl << endl;
    for (int i = 0; i < devices.size(); i++){
        cout << "\t" << i + 1 << ": " << devices[i].getInfo<CL_DEVICE_NAME>() << endl;
        cout << "\t\tMax compute units: " << devices[i].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << endl;
        cout << "\t\tMax work group size: " << devices[i].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << endl << endl;
    }

    // Pick one device
    pickDevice(device,devices);
    cout << "\nUsing OpenCL device: \t" << device.getInfo<CL_DEVICE_NAME>() << endl;
    cout << "\t\t\tMax compute units: " << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << endl;
    cout << "\t\t\tMax work group size: " << device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << endl;

    // Create an OpenCL context and command queue on that device.
    context = Context(device);
    queue = CommandQueue(context,device);

    // Convert the OpenCL source code to a string
    string source;
    ifstream file("opencl_kernel.cl");
    if (!file){
        cout << "\nNo OpenCL file found!" << endl << "Exiting..." << endl;
        system("PAUSE");
        exit(1);
    }
    while (!file.eof()){
        char line[256];
        file.getline(line,255);
        source += line;
    }

    const char* kernel_source = source.c_str();

    // Create an OpenCL program by performing runtime source compilation for the chosen device
    program = Program(context,kernel_source);
    cl_int result = program.build({ device });
    if (result) cout << "Error during compilation OpenCL code!!!\n (" << result << ")" << endl;
    if (result == CL_BUILD_PROGRAM_FAILURE) printErrorLog(program,device);

    // Create a kernel (entry point in the OpenCL source program)
    kernel = Kernel(program,"render_kernel");
}

void cleanUp(){
    delete cpu_output;
}

inline float clamp(float x){ return x < 0.0f ? 0.0f : x > 1.0f ? 1.0f : x; }

// convert RGB float in range [0,1] to int in range [0,255] and perform gamma correction
inline int toInt(float x){ return int(clamp(x) * 255 + .5); }

void saveImage(){
    // write image to PPM file,a very simple image file format
    // PPM files can be opened with IrfanView (download at www.irfanview.com) or GIMP
    FILE *f = fopen("opencl_raytracer.ppm","w");
    fprintf(f,"P3\n%d %d\n%d\n",image_width,image_height,255);

    // loop over all pixels,write RGB values
    for (int i = 0; i < image_width * image_height; i++)
        fprintf(f,"%d %d %d ",toInt(cpu_output[i].s[0]),toInt(cpu_output[i].s[1]),toInt(cpu_output[i].s[2]));
}

#define float3(x,y,z) {{x,z}}  // macro to replace ugly initializer braces

void initScene(Sphere* cpu_spheres){

    // left wall
    cpu_spheres[0].radius   = 200.0f;
    cpu_spheres[0].position = float3(-200.6f,0.0f,0.0f);
    cpu_spheres[0].color    = float3(0.75f,0.25f,0.25f);
    cpu_spheres[0].emission = float3(0.0f,0.0f);

    // right wall
    cpu_spheres[1].radius   = 200.0f;
    cpu_spheres[1].position = float3(200.6f,0.0f);
    cpu_spheres[1].color    = float3(0.25f,0.75f);
    cpu_spheres[1].emission = float3(0.0f,0.0f);

    // floor
    cpu_spheres[2].radius   = 200.0f;
    cpu_spheres[2].position = float3(0.0f,-200.4f,0.0f);
    cpu_spheres[2].color    = float3(0.9f,0.8f,0.7f);
    cpu_spheres[2].emission = float3(0.0f,0.0f);

    // ceiling
    cpu_spheres[3].radius   = 200.0f;
    cpu_spheres[3].position = float3(0.0f,200.4f,0.0f);
    cpu_spheres[3].color    = float3(0.9f,0.7f);
    cpu_spheres[3].emission = float3(0.0f,0.0f);

    // back wall
    cpu_spheres[4].radius   = 200.0f;
    cpu_spheres[4].position = float3(0.0f,-200.4f);
    cpu_spheres[4].color    = float3(0.9f,0.7f);
    cpu_spheres[4].emission = float3(0.0f,0.0f);

    // front wall 
    cpu_spheres[5].radius   = 200.0f;
    cpu_spheres[5].position = float3(0.0f,202.0f);
    cpu_spheres[5].color    = float3(0.9f,0.7f);
    cpu_spheres[5].emission = float3(0.0f,0.0f);

    // left sphere
    cpu_spheres[6].radius   = 0.16f;
    cpu_spheres[6].position = float3(-0.25f,-0.24f,-0.1f);
    cpu_spheres[6].color    = float3(0.9f,0.7f);
    cpu_spheres[6].emission = float3(0.0f,0.0f);

    // right sphere
    cpu_spheres[7].radius   = 0.16f;
    cpu_spheres[7].position = float3(0.25f,0.1f);
    cpu_spheres[7].color    = float3(0.9f,0.7f);
    cpu_spheres[7].emission = float3(0.0f,0.0f);

    // lightsource
    cpu_spheres[8].radius   = 1.0f;
    cpu_spheres[8].position = float3(0.0f,1.36f,0.0f);
    cpu_spheres[8].color    = float3(0.0f,0.0f);
    cpu_spheres[8].emission = float3(9.0f,8.0f,6.0f);

}

void main(){

    // initialise OpenCL
    initOpenCL();

    // allocate memory on CPU to hold the rendered image
    cpu_output = new cl_float3[image_width * image_height];

    // initialise scene
    const int sphere_count = 9;
    Sphere cpu_spheres[sphere_count];
    initScene(cpu_spheres);

    // Create buffers on the OpenCL device for the image and the scene
    cl_output = Buffer(context,CL_MEM_WRITE_ONLY,image_width * image_height * sizeof(cl_float3));
    cl_spheres = Buffer(context,CL_MEM_READ_ONLY,sphere_count * sizeof(Sphere));
    queue.enqueueWriteBuffer(cl_spheres,sphere_count * sizeof(Sphere),cpu_spheres);

    // specify OpenCL kernel arguments
    kernel.setArg(0,cl_spheres);
    kernel.setArg(1,image_width);
    kernel.setArg(2,image_height);
    kernel.setArg(3,sphere_count);
    kernel.setArg(4,cl_output);

    // every pixel in the image has its own thread or "work item",// so the total amount of work items equals the number of pixels
    std::size_t global_work_size = image_width * image_height;
    std::size_t local_work_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device);

    cout << "Kernel work group size: " << local_work_size << endl;

    // Ensure the global work size is a multiple of local work size
     if (global_work_size % local_work_size != 0)
        global_work_size = (global_work_size / local_work_size + 1) * local_work_size;

    cout << "Rendering started..." << endl;

    // launch the kernel
    queue.enqueueNDRangeKernel(kernel,NULL,global_work_size,local_work_size);
    queue.finish();

    cout << "Rendering done! \nCopying output from device to host" << endl;

    // read and copy OpenCL output to CPU
    queue.enqueueReadBuffer(cl_output,image_width * image_height * sizeof(cl_float3),cpu_output);

    // save image
    saveImage();
    cout << "Saved image to 'opencl_raytracer.ppm'" << endl;

    // release memory
    cleanUp();

    system("PAUSE");
}

opencl_kernel.cl:

/* OpenCL based simple sphere path tracer by Sam Lapere,2016*/
/* based on smallpt by Kevin Beason */
/* http://raytracey.blogspot.com */

__constant float EPSILON = 0.00003f; /* required to compensate for limited float precision */
__constant float PI = 3.14159265359f;
__constant int SAMPLES = 128;

typedef struct Ray{
    float3 origin;
    float3 dir;
} Ray;

typedef struct Sphere{
    float radius;
    float3 pos;
    float3 color;
    float3 emission;
} Sphere;

static float get_random(unsigned int *seed0,unsigned int *seed1) {

    /* hash the seeds using bitwise AND operations and bitshifts */
    *seed0 = 36969 * ((*seed0) & 65535) + ((*seed0) >> 16);  
    *seed1 = 18000 * ((*seed1) & 65535) + ((*seed1) >> 16);

    unsigned int ires = ((*seed0) << 16) + (*seed1);

    /* use union struct to convert int to float */
    union {
        float f;
        unsigned int ui;
    } res;

    res.ui = (ires & 0x007fffff) | 0x40000000;  /* bitwise AND,bitwise OR */
    return (res.f - 2.0f) / 2.0f;
}

Ray createCamRay(const int x_coord,const int y_coord,const int width,const int height){

    float fx = (float)x_coord / (float)width;  /* convert int in range [0 - width] to float in range [0-1] */
    float fy = (float)y_coord / (float)height; /* convert int in range [0 - height] to float in range [0-1] */

    /* calculate aspect ratio */
    float aspect_ratio = (float)(width) / (float)(height);
    float fx2 = (fx - 0.5f) * aspect_ratio;
    float fy2 = fy - 0.5f;

    /* determine position of pixel on screen */
    float3 pixel_pos = (float3)(fx2,-fy2,0.0f);

    /* create camera ray*/
    Ray ray;
    ray.origin = (float3)(0.0f,0.1f,2.0f); /* fixed camera position */
    ray.dir = normalize(pixel_pos - ray.origin); /* vector from camera to pixel on screen */

    return ray;
}

            /* (__global Sphere* sphere,const Ray* ray) */
float intersect_sphere(const Sphere* sphere,const Ray* ray) /* version using local copy of sphere */
{
    float3 rayToCenter = sphere->pos - ray->origin;
    float b = dot(rayToCenter,ray->dir);
    float c = dot(rayToCenter,rayToCenter) - sphere->radius*sphere->radius;
    float disc = b * b - c;

    if (disc < 0.0f) return 0.0f;
    else disc = sqrt(disc);

    if ((b - disc) > EPSILON) return b - disc;
    if ((b + disc) > EPSILON) return b + disc;

    return 0.0f;
}

bool intersect_scene(__constant Sphere* spheres,const Ray* ray,float* t,int* sphere_id,const int sphere_count)
{
    /* initialise t to a very large number,so t will be guaranteed to be smaller
    when a hit with the scene occurs */

    float inf = 1e20f;
    *t = inf;

    /* check if the ray intersects each sphere in the scene */
    for (int i = 0; i < sphere_count; i++)  {
    
        Sphere sphere = spheres[i]; /* create local copy of sphere */
    
        /* float hitdistance = intersect_sphere(&spheres[i],ray); */
        float hitdistance = intersect_sphere(&sphere,ray);
        /* keep track of the closest intersection and hitobject found so far */
        if (hitdistance != 0.0f && hitdistance < *t) {
            *t = hitdistance;
            *sphere_id = i;
        }
    }
    return *t < inf; /* true when ray interesects the scene */
}


/* the path tracing function */
/* computes a path (starting from the camera) with a defined number of bounces,accumulates light/color at each bounce */
/* each ray hitting a surface will be reflected in a random direction (by randomly sampling the hemisphere above the hitpoint) */
/* small optimisation: diffuse ray directions are calculated using cosine weighted importance sampling */

float3 trace(__constant Sphere* spheres,const Ray* camray,const int sphere_count,const int* seed0,const int* seed1){

    Ray ray = *camray;

    float3 accum_color = (float3)(0.0f,0.0f);
    float3 mask = (float3)(1.0f,1.0f,1.0f);

    for (int bounces = 0; bounces < 8; bounces++){

        float t;   /* distance to intersection */
        int hitsphere_id = 0; /* index of intersected sphere */

        /* if ray misses scene,return background colour */
        if (!intersect_scene(spheres,&ray,&t,&hitsphere_id,sphere_count))
            return accum_color += mask * (float3)(0.15f,0.15f,0.25f);

        /* else,we've got a hit! Fetch the closest hit sphere */
        Sphere hitsphere = spheres[hitsphere_id]; /* version with local copy of sphere */

        /* compute the hitpoint using the ray equation */
        float3 hitpoint = ray.origin + ray.dir * t;
    
        /* compute the surface normal and flip it if necessary to face the incoming ray */
        float3 normal = normalize(hitpoint - hitsphere.pos); 
        float3 normal_facing = dot(normal,ray.dir) < 0.0f ? normal : normal * (-1.0f);

        /* compute two random numbers to pick a random point on the hemisphere above the hitpoint*/
        float rand1 = 2.0f * PI * get_random(seed0,seed1);
        float rand2 = get_random(seed0,seed1);
        float rand2s = sqrt(rand2);

        /* create a local orthogonal coordinate frame centered at the hitpoint */
        float3 w = normal_facing;
        float3 axis = fabs(w.x) > 0.1f ? (float3)(0.0f,0.0f) : (float3)(1.0f,0.0f);
        float3 u = normalize(cross(axis,w));
        float3 v = cross(w,u);

        /* use the coordinte frame and random numbers to compute the next ray direction */
        float3 newdir = normalize(u * cos(rand1)*rand2s + v*sin(rand1)*rand2s + w*sqrt(1.0f - rand2));

        /* add a very small offset to the hitpoint to prevent self intersection */
        ray.origin = hitpoint + normal_facing * EPSILON;
        ray.dir = newdir;

        /* add the colour and light contributions to the accumulated colour */
        accum_color += mask * hitsphere.emission; 

        /* the mask colour picks up surface colours at each bounce */
        mask *= hitsphere.color; 
    
        /* perform cosine-weighted importance sampling for diffuse surfaces*/
        mask *= dot(newdir,normal_facing); 
    }

    return accum_color;
}

__kernel void render_kernel(__constant Sphere* spheres,const int height,__global float3* output){
    unsigned int work_item_id = get_global_id(0);   /* the unique global id of the work item for the current pixel */
    unsigned int x_coord = work_item_id % width;            /* x-coordinate of the pixel */
    unsigned int y_coord = work_item_id / width;            /* y-coordinate of the pixel */

    /* seeds for random number generator */
    unsigned int seed0 = x_coord;
    unsigned int seed1 = y_coord;

    Ray camray = createCamRay(x_coord,y_coord,width,height);

    /* add the light contribution of each sample and average over all samples*/
    float3 finalcolor = (float3)(0.0f,0.0f);
    float invSamples = 1.0f / SAMPLES;

    for (int i = 0; i < SAMPLES; i++)
        finalcolor += trace(spheres,&camray,sphere_count,&seed0,&seed1) * invSamples;

    /* store the pixelcolour in the output buffer */
    output[work_item_id] = finalcolor;
}

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)