问题描述
我编写了一个 python 代码来生成设置为 ppm 文件的 mandelbrot,现在我正在尝试实现 pyopencl 以加快进程并比较运行时间,但我非常不知道 pyopencl 在某些方面是如何工作的,并且在这种情况下,我所做的所有研究都没有帮助我。所以我的内核函数是这样的:
__kernel void mandelbrot(__global const float* real,__global const float* imaginary,__global const float* max_iterations,__global int* output)
{
int gid = get_global_id(0);
float rx = *real;
float iy = *imaginary;
float x = 0.0f;
float y = 0.0f;
int iterations = 0;
while( (iterations < max_iterations) && ((x*x) + (y*y) < 4.0f)) {
float temp = x*x - y*y + real;
y = 2.0 * x * y + imaginary;
x = temp;
iterations++;
}
}
我的输入变量如下所示:
real_gpu = cl.Buffer(ctx,mf.READ_ONLY | mf.copY_HOST_PTR,hostbuf = np.float32(realVal))
imag_gpu = cl.Buffer(ctx,hostbuf = np.float32(imagVal))
output = cl.Buffer(ctx,mf.WRITE_ONLY,width * height * np.dtype(np.float64).itemsize)
prg = cl.Program(ctx,string_parallelism).build()
mandelbrot = prg.mandelbrot
mandelbrot.set_scalar_arg_dtypes([np.float64,np.float64,None])
globalrange = (width,height)
localrange = None
mandelbrot(queue,globalrange,localrange,real_gpu,imag_gpu,maxN,output)
CompilerWarning: From-source build succeeded,but resulted in non-empty logs:
Build on <pyopencl.Device 'Pitcairn' on 'AMD Accelerated Parallel Processing' at 0x56229da25400> succeeded,but said:
"/tmp/OCL3291941T1.cl",line 13: warning: operand types are incompatible ("int"
and "const __global float *")
while( (iterations < max_iterations) && ((x*x) + (y*y) < 4.0f)) {
^
warn(text,CompilerWarning)
Traceback (most recent call last):
File "/home/tei/tei2020/rodrigues17193tei/hpc2/pyopencl_mandelbrot/paralell_mandelbrot.py",line 93,in <module>
main()
File "/home/tei/tei2020/rodrigues17193tei/hpc2/pyopencl_mandelbrot/paralell_mandelbrot.py",line 71,in main
mandelbrot(queue,output)
File "<generated code>",line 12,in enqueue_knl_mandelbrot
RuntimeError: when processing arg#1 (1-based): Unable to cast Python instance to C++ type (compile in debug mode for details)
我需要对变量进行哪些更改,以便我的内核可以正确执行?
解决方法
没有关于 maxN
类型的信息,但我假设它是 int
,因为 float
没有意义。
问题是 max_iterations
的内核参数是 __global const float*
,它需要在主机端创建缓冲区。将此作为缓冲区传递也没有任何意义。
所以我建议将 max_iterations
类型更改为 int
,如下所示:
kernel void mandelbrot(__global const float* real,__global const float* imaginary,int max_iterations,__global int* output)
{
.....
}
然后像这样将它传递给内核:
mandelbrot(queue,globalrange,localrange,real_gpu,imag_gpu,np.int32(maxN),output)