问题描述
我正在用cupy用pytorch运行cuda代码。
我的环境是 ubuntu 20,anaconda-python 3.7.6,nvidia-driver 440,cuda 10.2,cupy-cuda102,torch 1.4.0
import data_load_test
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
def main():
dataset = data_load_test.DataLoadtest()
training_loader = DataLoader(dataset,batch_size=1)
with torch.cuda.device(0):
pbar = tqdm(training_loader)
for epoch in range(3):
for i,img in enumerate(pbar):
print("see the message")
if __name__ == "__main__":
main()
和这样的数据加载器。
from torch.utils.data import Dataset
import cv2
import cupy as cp
def read_cuda_file(cuda_path):
f = open(cuda_path,'r')
source_line = ""
while True:
line = f.readline()
if not line: break
source_line = source_line + line
f.close()
return source_line
class DataLoadTest(Dataset):
def __init__(self):
source = read_cuda_file("cuda/cuda_code.cu")
cuda_source = '''{}'''.format(source)
module = cp.RawModule(code=cuda_source)
self.myfunc = module.get_function('myfunc')
self.input = cp.asarray(cv2.imread("hi.png",-1),cp.uint8)
h,w,c = self.input.shape
self.h = h
self.w = w
self.output = cp.zeros((w,h,3),dtype=cp.uint8)
self.block_size = (32,32)
self.grid_size = (h // self.block_size[1],w // self.block_size[0])
def __len__(self):
return 1
def __getitem__(self,idx):
self.myfunc(self.grid_size,self.block_size,(self.input,self.output,self.h,self.w))
return cp.asnumpy(self.output)
我的cuda代码是
#define PI 3.14159265358979323846f
extern "C"{
__global__ void myfunc(const unsigned char* refImg,unsigned char* warpImg,const long long cols,const long long rows)
{
long long x = blockDim.x * blockIdx.x + threadIdx.x;
long long y = blockDim.y * blockIdx.y + threadIdx.y;
long long indexImg = y * cols + x;
warpImg[indexImg * 3] = 0;
warpImg[indexImg * 3 + 1] = 1;
warpImg[indexImg * 3 + 2] = 2;
}
}
我有两个GPU TITAN V(设备0)和TITAN RTX(设备1)
with torch.cuda.device(0):
工作正常,但是
使用TITAN RTX,
with torch.cuda.device(1):
它给出了这样的错误消息。
File "cupy/core/raw.pyx",line 66,in cupy.core.raw.RawKernel.__call__
File "cupy/cuda/function.pyx",line 162,in cupy.cuda.function.Function.__call__
File "cupy/cuda/function.pyx",line 144,in cupy.cuda.function._launch
File "cupy/cuda/driver.pyx",line 293,in cupy.cuda.driver.launchKernel
File "cupy/cuda/driver.pyx",line 118,in cupy.cuda.driver.check_status
cupy.cuda.driver.CUDADriverError: CUDA_ERROR_CONTEXT_IS_DESTROYED: context is destroyed
请帮助。
解决方法
在main()中,当实例化dataLoadTest()类时,它发生在默认设备0上,因此cuPy在此处编译myFunc()。
下一行“ with torch.cuda.device(0):”是您切换到失败版本的设备1的地方吗?
打电话给我会发生什么
cuPy.cuda.Device(1).use()
作为main()中的第一行,以确保myFunc()在设备1上实例化吗?