OpenCL:在全局内存地址 0x### 处写入大小 # 无效

问题描述

解决方案后编辑:问题仅在于代码。这里没有硬件问题。现在到原始帖子:

我正在尝试使用基本的 OpenCL 程序。

程序简单地创建一个缓冲区,将 42 写入缓冲区,读取并输出。

这是用 C 编写的代码:

#define MY_PLATFORM 2 // Adjustable
#define MY_DEVICE 0 // Adjustable

#include <stdio.h>

#define CL_TARGET_OPENCL_VERSION 200
#include <CL/cl.h>

// Simple kernel that outputs 42.
const char *myCode = " \
    __kernel void fourtyTwo(__global int *output) { \n \
        int i = get_global_id(0); \n \
        output[i] = 42; \n \
    } \n \
\0";

int main(void) {
    cl_platform_id *myPlatforms = (cl_platform_id*)malloc(sizeof(cl_platform_id));
    cl_uint *myPlatformCount = (cl_uint*)malloc(sizeof(cl_uint));

    cl_device_id *myDevices = (cl_device_id*)malloc(sizeof(cl_device_id));
    cl_uint *myDeviceCount = (cl_uint*)malloc(sizeof(cl_uint));

    int err;

    /* Reference:
        cl_int clGetPlatformIDs(
            cl_uint num_entries,// I want just enough to reach MY_PLATFORM.
            cl_platform_id *platforms,// myPlatforms
            cl_uint *num_platforms     // myPlatformCount
        )
    */
    err = clGetPlatformIDs(MY_PLATFORM + 1,myPlatforms,myPlatformCount);
    if(err != 0) fprintf(stderr,"\nCould not query platforms.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_int clGetDeviceIDs(
            cl_platform_id num_entries,// I want the MY_PLATFORM'th entry of myPlatforms.
            cl_device_type device_type,// Any device will do.
            cl_uint num_entries,// I want just enough to reach MY_DEVICE.
            cl_device_id *devices,// myDevices
            cl_uint *num_devices        // myDeviceCount
        )
    */
    err = clGetDeviceIDs(myPlatforms[MY_PLATFORM],CL_DEVICE_TYPE_ALL,MY_DEVICE + 1,myDevices,myDeviceCount);
    if(err != 0) fprintf(stderr,"\nCould not query devices.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_context clCreateContext(
            cl_context_properties *properties,// The default properties should suffice.
            cl_uint num_devices,// I want just enough to reach MY_DEVICE.
            const cl_device_id *devices,// myDevices
            void *pfn_notify,// No need for this.
            void *user_data,// No need for this.
            cl_int *errcode_ret                // err
        )
    */
    cl_context myContext = clCreateContext(NULL,NULL,&err);
    if(err != 0) fprintf(stderr,"\nCould not open context.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_command_queue clCreateCommandQueueWithProperties(
            cl_context context,// myContext
            cl_device_id device,// I want the MY_DEVICE'th device of myDevices.
            const cl_queue_properties properties,// The default properties should suffice.
            cl_int *errcode_ret                   // err
        )
    */
    cl_command_queue myCommandQueue = clCreateCommandQueueWithProperties(myContext,myDevices[MY_DEVICE],"\nCould not open command queue.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_program clCreateProgramWithSource(
            cl_context context,// myContext
            cl_uint count,// There's only 1 source code.
            const char **strings,// myCode,passed as a length-1 array.
            const size_t *lengths,// Passing NULL indicates that all the strings are null-terminated.
            cl_int *errcode_ret    // err
        )
    */
    cl_program myProgram = clCreateProgramWithSource(myContext,1,(const char**)&myCode,"\nCould not create program.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_int clBuildProgram(
            cl_program program,// myProgram
            const cl_device_id *device_list,// myDevices
            const char *options,// This can be NULL,right? The docs didn't specify.
            void *pfn_notify,// No need for this.
            void *user_data                  // No need for this.
        )
    */
    err = clBuildProgram(myProgram,NULL);
    if(err != 0) {
        fprintf(stderr,"\nCould not build program.\nOpenCL failed with exit code %d\n\n",err);
        char *errLog;
        size_t errLen;
        clGetProgramBuildInfo(myProgram,CL_PROGRAM_BUILD_LOG,&errLen);
        errLog = (char*)malloc((errLen + 1) * sizeof(char));
        clGetProgramBuildInfo(myProgram,errLen,errLog,NULL);
        errLog[errLen] = 0;
        fprintf(stderr,"\nFull Build Log:\n%s\n\n",errLog);
    }

    /* Reference:
        cl_kernel clCreateKernel(
            cl_program program,// myProgram
            const char *kernel_name,// "fourtyTwo" (See above,definition of myCode)
            cl_int *errcode_ret      // err
        )
    */
    cl_kernel myKernel = clCreateKernel(myProgram,"fourtyTwo","\nCould not create kernel.\nOpenCL failed with exit code %d\n\n",err);

    size_t *globalSize = (size_t*)malloc(sizeof(size_t));
    *globalSize = 1; // There is only 1 item.
    size_t *localSize = (size_t*)malloc(sizeof(size_t));
    *localSize = 1; // There can only be 1 out of 1 item.

    int *outputArr = (int*)malloc(1 * sizeof(int));

    /* Reference:
        cl_mem clCreateBuffer(
            cl_context context,// myContext
            cl_mem_flags flags,// I'm only writing to the output array.
            size_t size,// Its only 1 integer.
            void *host_ptr,// I haven't allocated this space yet.
            cl_int *errcode_ret // err
        )
    */
    cl_mem outputBuffer = clCreateBuffer(myContext,CL_MEM_WRITE_ONLY,1 * sizeof(int),"\nCould not create buffer.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_int clSetKernelArg(
            cl_kernel kernel,// myKernel
            cl_uint arg_index,// Setting the 1st argument.
            size_t arg_size,// Passing 1 cl_mem object.
            const void *arg_value // The argument shall be the output buffer for the kernel to write to.
        )
    */
    err = clSetKernelArg(myKernel,sizeof(cl_mem),(void*)outputBuffer);
    if(err != 0) fprintf(stderr,"\nCould not set kernel argument.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_int clEnqueueNDRangeKernel(
            cl_command_queue command_queue,// myCommandQueue
            cl_kernel kernel,// myKernel
            cl_uint work_dim,// 1 dimensional.
            const size_t *global_work_offset,// Don't offset anything.
            const size_t *global_work_size,// globalSize
            const size_t *local_work_size,// localSize
            cl_uint num_events_in_wait_list,// I don't have a wait list (whatever that is).
            const cl_event *event_wait_list,// I don't have a wait list (whatever that is).
            cl_event *event                   // Don't create an event (whatever this means).
        )
    */
    err = clEnqueueNDRangeKernel(myCommandQueue,myKernel,globalSize,localSize,NULL);
    if(err != 0) fprintf(stderr,"\nCould not run kernel.\nOpenCL failed with exit code %d\n\n",err);

    /* Reference:
        cl_int clEnqueueReadBuffer(
            cl_command_queue command_queue,// myCommandQueue
            cl_mem buffer,// outputBuffer
            cl_bool blocking_read,// I'm doing everything synchronously.
            size_t offset,// Don't offset anything.
            size_t cb,// Reading in 1 integer.
            void *ptr,// Putting the data into outputArr.
            cl_uint num_events_in_wait_list,// I don't have a wait list (whatever that is).
            cl_event *event                  // Don't create an event (whatever this means).
        )
    */
    clEnqueueReadBuffer(myCommandQueue,outputBuffer,CL_TRUE,1 * sizeof(cl_int),outputArr,"\nCould not read from buffer.\nOpenCL failed with exit code %d\n\n",err);

    // Print the result
    printf("%d\n",outputArr[0]);
    return 0;
}

该程序不起作用。它保持缓冲区不变,并通过 ocgrind 抛出错误。

输出如下:

gcc main.c -o main -lOpenCL
./main

Invalid write of size 4 at global memory address 0x560d0b2cefc0
        Kernel: fourtyTwo
        Entity: Global(0,0) Local(0,0) Group(0,0)
          store i32 42,i32 addrspace(1)* %arrayidx,align 4,!dbg !23,!tbaa !24
        At line 3 (column 15) of input.cl:
          output[i] = 42;

0

Press ENTER or type command to continue

如果这是系统问题,hereclinfo 的输出。

我使用的是配备 Radeon Graphics (16) @ 1.7000Ghz 的 AMD Ryzen 7 PRO 4750U。

我正在运行 Arch Linux。

有没有人知道问题可能是什么?谢谢!

P.S. 任何对我的代码或我的问题措辞的批评,无论是否文明,我都欢迎!

解决方法

我是个小丑。错误就在我去设置内核参数的行上:

    err = clSetKernelArg(myKernel,sizeof(cl_mem),(void*)outputBuffer);

我在应该写 (void*)outputBuffer 的时候输入了 (void*)&outputBuffer。由于这个错误,内核试图写入某个任意位置而不是缓冲区内存中的实际位置。

相关问答

错误1:Request method ‘DELETE‘ not supported 错误还原:...
错误1:启动docker镜像时报错:Error response from daemon:...
错误1:private field ‘xxx‘ is never assigned 按Alt...
报错如下,通过源不能下载,最后警告pip需升级版本 Requirem...