问题描述
[请查看下面的编辑,问题的解决方案可能就在这里]
我正在尝试通过研究小型光线跟踪器来学习OpenCL(请参见以下link的代码)。
我没有“真正的” GPU,我目前正在装有Intel®Iris™Graphics 6100图形卡的macosx笔记本电脑上。
代码在CPU上运行良好,但在GPU上的行为却很奇怪。它的工作(或不工作)取决于每个像素的样本数量(在场景中传播光线后,通过像素拍摄以得到其颜色的光线的数量)。如果我采样少量(64),则可以得到1280x720的图片,但是如果采样128个样本,则只能得到较小的图片。据我了解,样本数量不应有任何变化(当然,图片质量除外)。我错过了与OpenCL / GPU纯粹相关的东西吗?
此外,似乎是从崩溃的GPU内存中提取了结果:
queue.enqueueReadBuffer(cl_output,CL_TRUE,image_width * image_height * sizeof(cl_float4),cpu_output);
在此阶段,我得到一个“中止陷阱:6”。
我想念一些东西。
[EDIT] 经过一番研究,我发现了一条有趣的线索:图形卡可能会因时间过多而自愿中止该任务。可以采用这种方式来避免“冻结”屏幕。 This话题对此进行讨论。
您对此有何看法?
我找不到关闭此行为的方法。你知道怎么办吗?
以下是文件:
main.cpp:
// OpenCL based simple sphere path tracer by Sam Lapere,2016
// based on smallpt by Kevin Beason
// http://raytracey.blogspot.com
#include <iostream>
#include <fstream>
#include <vector>
#include <CL\cl.hpp>
using namespace std;
using namespace cl;
const int image_width = 1280;
const int image_height = 720;
cl_float4* cpu_output;
CommandQueue queue;
Device device;
Kernel kernel;
Context context;
Program program;
Buffer cl_output;
Buffer cl_spheres;
// dummy variables are required for memory alignment
// float3 is considered as float4 by OpenCL
struct Sphere
{
cl_float radius;
cl_float dummy1;
cl_float dummy2;
cl_float dummy3;
cl_float3 position;
cl_float3 color;
cl_float3 emission;
};
void pickPlatform(Platform& platform,const vector<Platform>& platforms){
if (platforms.size() == 1) platform = platforms[0];
else{
int input = 0;
cout << "\nChoose an OpenCL platform: ";
cin >> input;
// handle incorrect user input
while (input < 1 || input > platforms.size()){
cin.clear(); //clear errors/bad flags on cin
cin.ignore(cin.rdbuf()->in_avail(),'\n'); // ignores exact number of chars in cin buffer
cout << "No such option. Choose an OpenCL platform: ";
cin >> input;
}
platform = platforms[input - 1];
}
}
void pickDevice(Device& device,const vector<Device>& devices){
if (devices.size() == 1) device = devices[0];
else{
int input = 0;
cout << "\nChoose an OpenCL device: ";
cin >> input;
// handle incorrect user input
while (input < 1 || input > devices.size()){
cin.clear(); //clear errors/bad flags on cin
cin.ignore(cin.rdbuf()->in_avail(),'\n'); // ignores exact number of chars in cin buffer
cout << "No such option. Choose an OpenCL device: ";
cin >> input;
}
device = devices[input - 1];
}
}
void printErrorLog(const Program& program,const Device& device){
// Get the error log and print to console
string buildlog = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device);
cerr << "Build log:" << std::endl << buildlog << std::endl;
// Print the error log to a file
FILE *log = fopen("errorlog.txt","w");
fprintf(log,"%s\n",buildlog);
cout << "Error log saved in 'errorlog.txt'" << endl;
system("PAUSE");
exit(1);
}
void initOpenCL()
{
// Get all available OpenCL platforms (e.g. AMD OpenCL,Nvidia CUDA,Intel OpenCL)
vector<Platform> platforms;
Platform::get(&platforms);
cout << "Available OpenCL platforms : " << endl << endl;
for (int i = 0; i < platforms.size(); i++)
cout << "\t" << i + 1 << ": " << platforms[i].getInfo<CL_PLATFORM_NAME>() << endl;
// Pick one platform
Platform platform;
pickPlatform(platform,platforms);
cout << "\nUsing OpenCL platform: \t" << platform.getInfo<CL_PLATFORM_NAME>() << endl;
// Get available OpenCL devices on platform
vector<Device> devices;
platform.getDevices(CL_DEVICE_TYPE_ALL,&devices);
cout << "Available OpenCL devices on this platform: " << endl << endl;
for (int i = 0; i < devices.size(); i++){
cout << "\t" << i + 1 << ": " << devices[i].getInfo<CL_DEVICE_NAME>() << endl;
cout << "\t\tMax compute units: " << devices[i].getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << endl;
cout << "\t\tMax work group size: " << devices[i].getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << endl << endl;
}
// Pick one device
pickDevice(device,devices);
cout << "\nUsing OpenCL device: \t" << device.getInfo<CL_DEVICE_NAME>() << endl;
cout << "\t\t\tMax compute units: " << device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>() << endl;
cout << "\t\t\tMax work group size: " << device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>() << endl;
// Create an OpenCL context and command queue on that device.
context = Context(device);
queue = CommandQueue(context,device);
// Convert the OpenCL source code to a string
string source;
ifstream file("opencl_kernel.cl");
if (!file){
cout << "\nNo OpenCL file found!" << endl << "Exiting..." << endl;
system("PAUSE");
exit(1);
}
while (!file.eof()){
char line[256];
file.getline(line,255);
source += line;
}
const char* kernel_source = source.c_str();
// Create an OpenCL program by performing runtime source compilation for the chosen device
program = Program(context,kernel_source);
cl_int result = program.build({ device });
if (result) cout << "Error during compilation OpenCL code!!!\n (" << result << ")" << endl;
if (result == CL_BUILD_PROGRAM_FAILURE) printErrorLog(program,device);
// Create a kernel (entry point in the OpenCL source program)
kernel = Kernel(program,"render_kernel");
}
void cleanUp(){
delete cpu_output;
}
inline float clamp(float x){ return x < 0.0f ? 0.0f : x > 1.0f ? 1.0f : x; }
// convert RGB float in range [0,1] to int in range [0,255] and perform gamma correction
inline int toInt(float x){ return int(clamp(x) * 255 + .5); }
void saveImage(){
// write image to PPM file,a very simple image file format
// PPM files can be opened with IrfanView (download at www.irfanview.com) or GIMP
FILE *f = fopen("opencl_raytracer.ppm","w");
fprintf(f,"P3\n%d %d\n%d\n",image_width,image_height,255);
// loop over all pixels,write RGB values
for (int i = 0; i < image_width * image_height; i++)
fprintf(f,"%d %d %d ",toInt(cpu_output[i].s[0]),toInt(cpu_output[i].s[1]),toInt(cpu_output[i].s[2]));
}
#define float3(x,y,z) {{x,z}} // macro to replace ugly initializer braces
void initScene(Sphere* cpu_spheres){
// left wall
cpu_spheres[0].radius = 200.0f;
cpu_spheres[0].position = float3(-200.6f,0.0f,0.0f);
cpu_spheres[0].color = float3(0.75f,0.25f,0.25f);
cpu_spheres[0].emission = float3(0.0f,0.0f);
// right wall
cpu_spheres[1].radius = 200.0f;
cpu_spheres[1].position = float3(200.6f,0.0f);
cpu_spheres[1].color = float3(0.25f,0.75f);
cpu_spheres[1].emission = float3(0.0f,0.0f);
// floor
cpu_spheres[2].radius = 200.0f;
cpu_spheres[2].position = float3(0.0f,-200.4f,0.0f);
cpu_spheres[2].color = float3(0.9f,0.8f,0.7f);
cpu_spheres[2].emission = float3(0.0f,0.0f);
// ceiling
cpu_spheres[3].radius = 200.0f;
cpu_spheres[3].position = float3(0.0f,200.4f,0.0f);
cpu_spheres[3].color = float3(0.9f,0.7f);
cpu_spheres[3].emission = float3(0.0f,0.0f);
// back wall
cpu_spheres[4].radius = 200.0f;
cpu_spheres[4].position = float3(0.0f,-200.4f);
cpu_spheres[4].color = float3(0.9f,0.7f);
cpu_spheres[4].emission = float3(0.0f,0.0f);
// front wall
cpu_spheres[5].radius = 200.0f;
cpu_spheres[5].position = float3(0.0f,202.0f);
cpu_spheres[5].color = float3(0.9f,0.7f);
cpu_spheres[5].emission = float3(0.0f,0.0f);
// left sphere
cpu_spheres[6].radius = 0.16f;
cpu_spheres[6].position = float3(-0.25f,-0.24f,-0.1f);
cpu_spheres[6].color = float3(0.9f,0.7f);
cpu_spheres[6].emission = float3(0.0f,0.0f);
// right sphere
cpu_spheres[7].radius = 0.16f;
cpu_spheres[7].position = float3(0.25f,0.1f);
cpu_spheres[7].color = float3(0.9f,0.7f);
cpu_spheres[7].emission = float3(0.0f,0.0f);
// lightsource
cpu_spheres[8].radius = 1.0f;
cpu_spheres[8].position = float3(0.0f,1.36f,0.0f);
cpu_spheres[8].color = float3(0.0f,0.0f);
cpu_spheres[8].emission = float3(9.0f,8.0f,6.0f);
}
void main(){
// initialise OpenCL
initOpenCL();
// allocate memory on CPU to hold the rendered image
cpu_output = new cl_float3[image_width * image_height];
// initialise scene
const int sphere_count = 9;
Sphere cpu_spheres[sphere_count];
initScene(cpu_spheres);
// Create buffers on the OpenCL device for the image and the scene
cl_output = Buffer(context,CL_MEM_WRITE_ONLY,image_width * image_height * sizeof(cl_float3));
cl_spheres = Buffer(context,CL_MEM_READ_ONLY,sphere_count * sizeof(Sphere));
queue.enqueueWriteBuffer(cl_spheres,sphere_count * sizeof(Sphere),cpu_spheres);
// specify OpenCL kernel arguments
kernel.setArg(0,cl_spheres);
kernel.setArg(1,image_width);
kernel.setArg(2,image_height);
kernel.setArg(3,sphere_count);
kernel.setArg(4,cl_output);
// every pixel in the image has its own thread or "work item",// so the total amount of work items equals the number of pixels
std::size_t global_work_size = image_width * image_height;
std::size_t local_work_size = kernel.getWorkGroupInfo<CL_KERNEL_WORK_GROUP_SIZE>(device);
cout << "Kernel work group size: " << local_work_size << endl;
// Ensure the global work size is a multiple of local work size
if (global_work_size % local_work_size != 0)
global_work_size = (global_work_size / local_work_size + 1) * local_work_size;
cout << "Rendering started..." << endl;
// launch the kernel
queue.enqueueNDRangeKernel(kernel,NULL,global_work_size,local_work_size);
queue.finish();
cout << "Rendering done! \nCopying output from device to host" << endl;
// read and copy OpenCL output to CPU
queue.enqueueReadBuffer(cl_output,image_width * image_height * sizeof(cl_float3),cpu_output);
// save image
saveImage();
cout << "Saved image to 'opencl_raytracer.ppm'" << endl;
// release memory
cleanUp();
system("PAUSE");
}
opencl_kernel.cl:
/* OpenCL based simple sphere path tracer by Sam Lapere,2016*/
/* based on smallpt by Kevin Beason */
/* http://raytracey.blogspot.com */
__constant float EPSILON = 0.00003f; /* required to compensate for limited float precision */
__constant float PI = 3.14159265359f;
__constant int SAMPLES = 128;
typedef struct Ray{
float3 origin;
float3 dir;
} Ray;
typedef struct Sphere{
float radius;
float3 pos;
float3 color;
float3 emission;
} Sphere;
static float get_random(unsigned int *seed0,unsigned int *seed1) {
/* hash the seeds using bitwise AND operations and bitshifts */
*seed0 = 36969 * ((*seed0) & 65535) + ((*seed0) >> 16);
*seed1 = 18000 * ((*seed1) & 65535) + ((*seed1) >> 16);
unsigned int ires = ((*seed0) << 16) + (*seed1);
/* use union struct to convert int to float */
union {
float f;
unsigned int ui;
} res;
res.ui = (ires & 0x007fffff) | 0x40000000; /* bitwise AND,bitwise OR */
return (res.f - 2.0f) / 2.0f;
}
Ray createCamRay(const int x_coord,const int y_coord,const int width,const int height){
float fx = (float)x_coord / (float)width; /* convert int in range [0 - width] to float in range [0-1] */
float fy = (float)y_coord / (float)height; /* convert int in range [0 - height] to float in range [0-1] */
/* calculate aspect ratio */
float aspect_ratio = (float)(width) / (float)(height);
float fx2 = (fx - 0.5f) * aspect_ratio;
float fy2 = fy - 0.5f;
/* determine position of pixel on screen */
float3 pixel_pos = (float3)(fx2,-fy2,0.0f);
/* create camera ray*/
Ray ray;
ray.origin = (float3)(0.0f,0.1f,2.0f); /* fixed camera position */
ray.dir = normalize(pixel_pos - ray.origin); /* vector from camera to pixel on screen */
return ray;
}
/* (__global Sphere* sphere,const Ray* ray) */
float intersect_sphere(const Sphere* sphere,const Ray* ray) /* version using local copy of sphere */
{
float3 rayToCenter = sphere->pos - ray->origin;
float b = dot(rayToCenter,ray->dir);
float c = dot(rayToCenter,rayToCenter) - sphere->radius*sphere->radius;
float disc = b * b - c;
if (disc < 0.0f) return 0.0f;
else disc = sqrt(disc);
if ((b - disc) > EPSILON) return b - disc;
if ((b + disc) > EPSILON) return b + disc;
return 0.0f;
}
bool intersect_scene(__constant Sphere* spheres,const Ray* ray,float* t,int* sphere_id,const int sphere_count)
{
/* initialise t to a very large number,so t will be guaranteed to be smaller
when a hit with the scene occurs */
float inf = 1e20f;
*t = inf;
/* check if the ray intersects each sphere in the scene */
for (int i = 0; i < sphere_count; i++) {
Sphere sphere = spheres[i]; /* create local copy of sphere */
/* float hitdistance = intersect_sphere(&spheres[i],ray); */
float hitdistance = intersect_sphere(&sphere,ray);
/* keep track of the closest intersection and hitobject found so far */
if (hitdistance != 0.0f && hitdistance < *t) {
*t = hitdistance;
*sphere_id = i;
}
}
return *t < inf; /* true when ray interesects the scene */
}
/* the path tracing function */
/* computes a path (starting from the camera) with a defined number of bounces,accumulates light/color at each bounce */
/* each ray hitting a surface will be reflected in a random direction (by randomly sampling the hemisphere above the hitpoint) */
/* small optimisation: diffuse ray directions are calculated using cosine weighted importance sampling */
float3 trace(__constant Sphere* spheres,const Ray* camray,const int sphere_count,const int* seed0,const int* seed1){
Ray ray = *camray;
float3 accum_color = (float3)(0.0f,0.0f);
float3 mask = (float3)(1.0f,1.0f,1.0f);
for (int bounces = 0; bounces < 8; bounces++){
float t; /* distance to intersection */
int hitsphere_id = 0; /* index of intersected sphere */
/* if ray misses scene,return background colour */
if (!intersect_scene(spheres,&ray,&t,&hitsphere_id,sphere_count))
return accum_color += mask * (float3)(0.15f,0.15f,0.25f);
/* else,we've got a hit! Fetch the closest hit sphere */
Sphere hitsphere = spheres[hitsphere_id]; /* version with local copy of sphere */
/* compute the hitpoint using the ray equation */
float3 hitpoint = ray.origin + ray.dir * t;
/* compute the surface normal and flip it if necessary to face the incoming ray */
float3 normal = normalize(hitpoint - hitsphere.pos);
float3 normal_facing = dot(normal,ray.dir) < 0.0f ? normal : normal * (-1.0f);
/* compute two random numbers to pick a random point on the hemisphere above the hitpoint*/
float rand1 = 2.0f * PI * get_random(seed0,seed1);
float rand2 = get_random(seed0,seed1);
float rand2s = sqrt(rand2);
/* create a local orthogonal coordinate frame centered at the hitpoint */
float3 w = normal_facing;
float3 axis = fabs(w.x) > 0.1f ? (float3)(0.0f,0.0f) : (float3)(1.0f,0.0f);
float3 u = normalize(cross(axis,w));
float3 v = cross(w,u);
/* use the coordinte frame and random numbers to compute the next ray direction */
float3 newdir = normalize(u * cos(rand1)*rand2s + v*sin(rand1)*rand2s + w*sqrt(1.0f - rand2));
/* add a very small offset to the hitpoint to prevent self intersection */
ray.origin = hitpoint + normal_facing * EPSILON;
ray.dir = newdir;
/* add the colour and light contributions to the accumulated colour */
accum_color += mask * hitsphere.emission;
/* the mask colour picks up surface colours at each bounce */
mask *= hitsphere.color;
/* perform cosine-weighted importance sampling for diffuse surfaces*/
mask *= dot(newdir,normal_facing);
}
return accum_color;
}
__kernel void render_kernel(__constant Sphere* spheres,const int height,__global float3* output){
unsigned int work_item_id = get_global_id(0); /* the unique global id of the work item for the current pixel */
unsigned int x_coord = work_item_id % width; /* x-coordinate of the pixel */
unsigned int y_coord = work_item_id / width; /* y-coordinate of the pixel */
/* seeds for random number generator */
unsigned int seed0 = x_coord;
unsigned int seed1 = y_coord;
Ray camray = createCamRay(x_coord,y_coord,width,height);
/* add the light contribution of each sample and average over all samples*/
float3 finalcolor = (float3)(0.0f,0.0f);
float invSamples = 1.0f / SAMPLES;
for (int i = 0; i < SAMPLES; i++)
finalcolor += trace(spheres,&camray,sphere_count,&seed0,&seed1) * invSamples;
/* store the pixelcolour in the output buffer */
output[work_item_id] = finalcolor;
}
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)