问题描述
解决方案后编辑:问题仅在于代码。这里没有硬件问题。现在到原始帖子:
我正在尝试使用基本的 OpenCL 程序。
程序简单地创建一个缓冲区,将 42 写入缓冲区,读取并输出。
这是用 C 编写的代码:
#define MY_PLATFORM 2 // Adjustable
#define MY_DEVICE 0 // Adjustable
#include <stdio.h>
#define CL_TARGET_OPENCL_VERSION 200
#include <CL/cl.h>
// Simple kernel that outputs 42.
const char *myCode = " \
__kernel void fourtyTwo(__global int *output) { \n \
int i = get_global_id(0); \n \
output[i] = 42; \n \
} \n \
\0";
int main(void) {
cl_platform_id *myPlatforms = (cl_platform_id*)malloc(sizeof(cl_platform_id));
cl_uint *myPlatformCount = (cl_uint*)malloc(sizeof(cl_uint));
cl_device_id *myDevices = (cl_device_id*)malloc(sizeof(cl_device_id));
cl_uint *myDeviceCount = (cl_uint*)malloc(sizeof(cl_uint));
int err;
/* Reference:
cl_int clGetPlatformIDs(
cl_uint num_entries,// I want just enough to reach MY_PLATFORM.
cl_platform_id *platforms,// myPlatforms
cl_uint *num_platforms // myPlatformCount
)
*/
err = clGetPlatformIDs(MY_PLATFORM + 1,myPlatforms,myPlatformCount);
if(err != 0) fprintf(stderr,"\nCould not query platforms.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_int clGetDeviceIDs(
cl_platform_id num_entries,// I want the MY_PLATFORM'th entry of myPlatforms.
cl_device_type device_type,// Any device will do.
cl_uint num_entries,// I want just enough to reach MY_DEVICE.
cl_device_id *devices,// myDevices
cl_uint *num_devices // myDeviceCount
)
*/
err = clGetDeviceIDs(myPlatforms[MY_PLATFORM],CL_DEVICE_TYPE_ALL,MY_DEVICE + 1,myDevices,myDeviceCount);
if(err != 0) fprintf(stderr,"\nCould not query devices.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_context clCreateContext(
cl_context_properties *properties,// The default properties should suffice.
cl_uint num_devices,// I want just enough to reach MY_DEVICE.
const cl_device_id *devices,// myDevices
void *pfn_notify,// No need for this.
void *user_data,// No need for this.
cl_int *errcode_ret // err
)
*/
cl_context myContext = clCreateContext(NULL,NULL,&err);
if(err != 0) fprintf(stderr,"\nCould not open context.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_command_queue clCreateCommandQueueWithProperties(
cl_context context,// myContext
cl_device_id device,// I want the MY_DEVICE'th device of myDevices.
const cl_queue_properties properties,// The default properties should suffice.
cl_int *errcode_ret // err
)
*/
cl_command_queue myCommandQueue = clCreateCommandQueueWithProperties(myContext,myDevices[MY_DEVICE],"\nCould not open command queue.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_program clCreateProgramWithSource(
cl_context context,// myContext
cl_uint count,// There's only 1 source code.
const char **strings,// myCode,passed as a length-1 array.
const size_t *lengths,// Passing NULL indicates that all the strings are null-terminated.
cl_int *errcode_ret // err
)
*/
cl_program myProgram = clCreateProgramWithSource(myContext,1,(const char**)&myCode,"\nCould not create program.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_int clBuildProgram(
cl_program program,// myProgram
const cl_device_id *device_list,// myDevices
const char *options,// This can be NULL,right? The docs didn't specify.
void *pfn_notify,// No need for this.
void *user_data // No need for this.
)
*/
err = clBuildProgram(myProgram,NULL);
if(err != 0) {
fprintf(stderr,"\nCould not build program.\nOpenCL failed with exit code %d\n\n",err);
char *errLog;
size_t errLen;
clGetProgramBuildInfo(myProgram,CL_PROGRAM_BUILD_LOG,&errLen);
errLog = (char*)malloc((errLen + 1) * sizeof(char));
clGetProgramBuildInfo(myProgram,errLen,errLog,NULL);
errLog[errLen] = 0;
fprintf(stderr,"\nFull Build Log:\n%s\n\n",errLog);
}
/* Reference:
cl_kernel clCreateKernel(
cl_program program,// myProgram
const char *kernel_name,// "fourtyTwo" (See above,definition of myCode)
cl_int *errcode_ret // err
)
*/
cl_kernel myKernel = clCreateKernel(myProgram,"fourtyTwo","\nCould not create kernel.\nOpenCL failed with exit code %d\n\n",err);
size_t *globalSize = (size_t*)malloc(sizeof(size_t));
*globalSize = 1; // There is only 1 item.
size_t *localSize = (size_t*)malloc(sizeof(size_t));
*localSize = 1; // There can only be 1 out of 1 item.
int *outputArr = (int*)malloc(1 * sizeof(int));
/* Reference:
cl_mem clCreateBuffer(
cl_context context,// myContext
cl_mem_flags flags,// I'm only writing to the output array.
size_t size,// Its only 1 integer.
void *host_ptr,// I haven't allocated this space yet.
cl_int *errcode_ret // err
)
*/
cl_mem outputBuffer = clCreateBuffer(myContext,CL_MEM_WRITE_ONLY,1 * sizeof(int),"\nCould not create buffer.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_int clSetKernelArg(
cl_kernel kernel,// myKernel
cl_uint arg_index,// Setting the 1st argument.
size_t arg_size,// Passing 1 cl_mem object.
const void *arg_value // The argument shall be the output buffer for the kernel to write to.
)
*/
err = clSetKernelArg(myKernel,sizeof(cl_mem),(void*)outputBuffer);
if(err != 0) fprintf(stderr,"\nCould not set kernel argument.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_int clEnqueueNDRangeKernel(
cl_command_queue command_queue,// myCommandQueue
cl_kernel kernel,// myKernel
cl_uint work_dim,// 1 dimensional.
const size_t *global_work_offset,// Don't offset anything.
const size_t *global_work_size,// globalSize
const size_t *local_work_size,// localSize
cl_uint num_events_in_wait_list,// I don't have a wait list (whatever that is).
const cl_event *event_wait_list,// I don't have a wait list (whatever that is).
cl_event *event // Don't create an event (whatever this means).
)
*/
err = clEnqueueNDRangeKernel(myCommandQueue,myKernel,globalSize,localSize,NULL);
if(err != 0) fprintf(stderr,"\nCould not run kernel.\nOpenCL failed with exit code %d\n\n",err);
/* Reference:
cl_int clEnqueueReadBuffer(
cl_command_queue command_queue,// myCommandQueue
cl_mem buffer,// outputBuffer
cl_bool blocking_read,// I'm doing everything synchronously.
size_t offset,// Don't offset anything.
size_t cb,// Reading in 1 integer.
void *ptr,// Putting the data into outputArr.
cl_uint num_events_in_wait_list,// I don't have a wait list (whatever that is).
cl_event *event // Don't create an event (whatever this means).
)
*/
clEnqueueReadBuffer(myCommandQueue,outputBuffer,CL_TRUE,1 * sizeof(cl_int),outputArr,"\nCould not read from buffer.\nOpenCL failed with exit code %d\n\n",err);
// Print the result
printf("%d\n",outputArr[0]);
return 0;
}
该程序不起作用。它保持缓冲区不变,并通过 ocgrind 抛出错误。
输出如下:
gcc main.c -o main -lOpenCL
./main
Invalid write of size 4 at global memory address 0x560d0b2cefc0
Kernel: fourtyTwo
Entity: Global(0,0) Local(0,0) Group(0,0)
store i32 42,i32 addrspace(1)* %arrayidx,align 4,!dbg !23,!tbaa !24
At line 3 (column 15) of input.cl:
output[i] = 42;
0
Press ENTER or type command to continue
如果这是系统问题,here 是 clinfo
的输出。
我使用的是配备 Radeon Graphics (16) @ 1.7000Ghz 的 AMD Ryzen 7 PRO 4750U。
我正在运行 Arch Linux。
有没有人知道问题可能是什么?谢谢!
P.S. 任何对我的代码或我的问题措辞的批评,无论是否文明,我都欢迎!
解决方法
我是个小丑。错误就在我去设置内核参数的行上:
err = clSetKernelArg(myKernel,sizeof(cl_mem),(void*)outputBuffer);
我在应该写 (void*)outputBuffer
的时候输入了 (void*)&outputBuffer
。由于这个错误,内核试图写入某个任意位置而不是缓冲区内存中的实际位置。