问题描述
from numba import cuda
import time
import numpy as np
@cuda.jit()
def test1(a,b,c,d,e,f,g,h,i):
pass
@cuda.jit()
def test2():
pass
ca=cuda.to_device(np.full(shape=10000,dtype=np.int32,fill_value=-1))
cb=cuda.to_device(np.full(shape=10000,fill_value=-1))
cc=cuda.to_device(np.full(shape=10000,fill_value=-1))
cd=cuda.to_device(np.full(shape=10000,fill_value=-1))
ce=cuda.to_device(np.full(shape=10000,fill_value=-1))
cf=cuda.to_device(np.full(shape=10000,fill_value=-1))
cg=cuda.to_device(np.full(shape=10000,fill_value=-1))
ch=cuda.to_device(np.full(shape=10000,fill_value=-1))
ci=cuda.to_device(np.full(shape=10000,fill_value=-1))
for i in range(4):
stime=time.time()
test1[1,1](ca,cb,cc,cd,ce,cf,cg,ch,ci)
cuda.synchronize()
etime=time.time()
print(etime-stime)
stime = time.time()
test2[1,1]()
cuda.synchronize()
etime = time.time()
print(etime - stime)
print()
打印时间
0.21714425086975098
0.1640312671661377
0.0014150142669677734
0.0002522468566894531
0.0009179115295410156
0.00022268295288085938
0.0008695125579833984
0.000217437744140625
这些变量已放置在GPU上。为什么将参数传递给内核时时间仍然增加
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)