问题描述
我最近在Coursera中使用自动编码器完成了图像超分辨率,当我尝试使用Spyder和Jupyter笔记本在笔记本电脑上运行相同的代码时,我一直收到此错误。 我正在使用 Nvidia GeForce 1650Ti 和 tensorflow-gpu = 2.3.0,CUDA = 10.1,cuDNN = 7.6.5和python = 3.8.5 。我使用相同的配置来运行许多深度神经网络问题,但没有一个给出此错误。
代码:
# Image Super Resolution using Autoencoder
# Loading the Images
x_train_n = []
x_train_down = []
x_train_n2 = []
x_train_down2 = []
import tensorflow as tf
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction = 0.95)
session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
path = 'D:/GPU testing/Image Super Resolution/data/cars_train/'
images = os.listdir(path)
size = 0
for a in images:
try:
img = image.load_img(str(path+a),target_size=(64,64,3))
img_1 = image.img_to_array(img)
img_1 = img_1/255.
x_train_n.append(img_1)
dwn2 = rescale(rescale(img_1,0.5,multichannel=True),2.0,multichannel=True)
img_2 = image.img_to_array(dwn2)
x_train_down.append(img_2)
size+= 1
except:
print("Error loading image")
size += 1
if size >= 64:
break
x_train_n2 = np.array(x_train_n)
print(x_train_n2.shape)
x_train_down2 = np.array(x_train_down)
print(x_train_down2.shape)
# Building a Model
from tensorflow.keras.layers import Input,Dense,Conv2D,MaxPooling2D,Dropout,Conv2DTranspose,UpSampling2D,add
from tensorflow.keras.models import Model
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
# Building the Encoder
input_img = Input(shape=(64,3))
l1 = Conv2D(64,(3,3),padding='same',activation='relu',activity_regularizer=regularizers.l1(10e-10))(input_img)
l2 = Conv2D(64,activity_regularizer=regularizers.l1(10e-10))(l1)
l3 = MaxPooling2D(padding='same')(l2)
l3 = Dropout(0.3)(l3)
l4 = Conv2D(128,activity_regularizer=regularizers.l1(10e-10))(l3)
l5 = Conv2D(128,activity_regularizer=regularizers.l1(10e-10))(l4)
l6 = MaxPooling2D(padding='same')(l5)
l7 = Conv2D(256,activity_regularizer=regularizers.l1(10e-10))(l6)
# Building the Decoder
l8 = UpSampling2D()(l7)
l9 = Conv2D(128,activity_regularizer=regularizers.l1(10e-10))(l8)
l10 = Conv2D(128,activity_regularizer=regularizers.l1(10e-10))(l9)
l11 = add([l5,l10])
l12 = UpSampling2D()(l11)
l13 = Conv2D(64,activity_regularizer=regularizers.l1(10e-10))(l12)
l14 = Conv2D(64,activity_regularizer=regularizers.l1(10e-10))(l13)
l15 = add([l14,l2])
# chan = 3,for RGB
decoded = Conv2D(3,activity_regularizer=regularizers.l1(10e-10))(l15)
# Create our network
autoencoder = Model(input_img,decoded)
autoencoder_hfenn = Model(input_img,decoded)
autoencoder.compile(optimizer='adadelta',loss='mean_squared_error')
autoencoder.summary()
# Training the Model
history = autoencoder.fit(x_train_down2,x_train_n2,epochs=20,batch_size=16,validation_steps=100,shuffle=True,validation_split=0.15)
# Saving the Model
autoencoder.save('ISR_model_weight.h5')
# Represeting Model as JSON String
autoencoder_json = autoencoder.to_json()
with open('ISR_model.json','w') as json_file:
json_file.write(autoencoder_json)
错误:
2020-09-18 20:44:23.655077: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3891 MB memory) -> physical GPU (device: 0,name: GeForce GTX 1650 Ti,pci bus id: 0000:01:00.0,compute capability: 7.5)
2020-09-18 20:44:23.658359: I tensorflow/stream_executor/cuda/cuda_driver.cc:775] Failed to allocate 3.80G (4080218880 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2020-09-18 20:44:23.659070: I tensorflow/stream_executor/cuda/cuda_driver.cc:775] Failed to allocate 3.42G (3672196864 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2020-09-18 20:44:25.560185: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudnn64_7.dll
Traceback (most recent call last):
File "D:\GPU testing\Image Super Resolution\Image Super Resolution using Autoencoders.py",line 126,in <module>
history = autoencoder.fit(x_train_down2,File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\keras\engine\training.py",line 108,in _method_wrapper
return method(self,*args,**kwargs)
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\keras\engine\training.py",line 1098,in fit
tmp_logs = train_function(iterator)
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\eager\def_function.py",line 780,in __call__
result = self._call(*args,**kwds)
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\eager\def_function.py",line 840,in _call
return self._stateless_fn(*args,**kwds)
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\eager\function.py",line 2829,in __call__
return graph_function._filtered_call(args,kwargs) # pylint: disable=protected-access
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\eager\function.py",line 1843,in _filtered_call
return self._call_flat(
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\eager\function.py",line 1923,in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\eager\function.py",line 545,in call
outputs = execute.execute(
File "D:\anaconda3\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\eager\execute.py",line 59,in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle,device_name,op_name,UnkNownError: Failed to get convolution algorithm. This is probably because cuDNN Failed to initialize,so try looking to see if a warning log message was printed above.
[[node functional_1/conv2d/Relu (defined at D:\GPU testing\Image Super Resolution\Image Super Resolution using Autoencoders.py:126) ]] [Op:__inference_train_function_2246]
Function call stack:
train_function
2020-09-18 20:44:19.489732: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudart64_101.dll
2020-09-18 20:44:21.291233: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (onednN)to use the following cpu instructions in performance-critical operations: AVX2
To enable them in other operations,rebuild TensorFlow with the appropriate compiler flags.
2020-09-18 20:44:21.306618: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x22a29eaa6b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-09-18 20:44:21.308804: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host,Default Version
2020-09-18 20:44:21.310433: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library nvcuda.dll
2020-09-18 20:44:22.424648: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1650 Ti computeCapability: 7.5
coreClock: 1.485GHz coreCount: 16 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2020-09-18 20:44:22.425736: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudart64_101.dll
2020-09-18 20:44:22.468696: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-09-18 20:44:23.161235: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-09-18 20:44:23.161847: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0
2020-09-18 20:44:23.162188: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N
2020-09-18 20:44:23.162708: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3891 MB memory) -> physical GPU (device: 0,compute capability: 7.5)
2020-09-18 20:44:23.167626: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x22a52959fb0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-09-18 20:44:23.168513: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): GeForce GTX 1650 Ti,Compute Capability 7.5
2020-09-18 20:44:23.642458: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1650 Ti computeCapability: 7.5
coreClock: 1.485GHz coreCount: 16 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2020-09-18 20:44:23.643553: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudart64_101.dll
2020-09-18 20:44:23.647378: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-09-18 20:44:23.648372: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:01:00.0 name: GeForce GTX 1650 Ti computeCapability: 7.5
coreClock: 1.485GHz coreCount: 16 deviceMemorySize: 4.00GiB deviceMemoryBandwidth: 178.84GiB/s
2020-09-18 20:44:23.649458: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudart64_101.dll
2020-09-18 20:44:23.653267: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-09-18 20:44:23.653735: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-09-18 20:44:23.654291: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0
2020-09-18 20:44:23.654631: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N
2020-09-18 20:44:23.655077: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3891 MB memory) -> physical GPU (device: 0,compute capability: 7.5)
2020-09-18 20:44:23.658359: I tensorflow/stream_executor/cuda/cuda_driver.cc:775] Failed to allocate 3.80G (4080218880 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2020-09-18 20:44:23.659070: I tensorflow/stream_executor/cuda/cuda_driver.cc:775] Failed to allocate 3.42G (3672196864 bytes) from device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
2020-09-18 20:44:25.560185: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library cudnn64_7.dll
2020-09-18 20:44:26.855418: E tensorflow/stream_executor/cuda/cuda_dnn.cc:328] Could not create cudnn handle: CUDNN_STATUS_ALLOC_Failed
2020-09-18 20:44:26.856558: E tensorflow/stream_executor/cuda/cuda_dnn.cc:328] Could not create cudnn handle: CUDNN_STATUS_ALLOC_Failed
2020-09-18 20:44:26.857303: W tensorflow/core/framework/op_kernel.cc:1767] OP_REQUIRES Failed at conv_ops_fused_impl.h:642 : UnkNown: Failed to get convolution algorithm. This is probably because cuDNN Failed to initialize,so try looking to see if a warning log message was printed above.
我尝试过GPU的增长:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(config=config)
,并且还限制了GPU的使用:
gpu_options = tf.compat.v1.GPUOptions(per_process_gpu_memory_fraction = 0.95)
session = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
但是他们没有解决问题。
我最近遇到了这篇文章:什么是自动编码器?使用Analytics Vidya的自动编码器增强模糊的图像,并尝试提供的代码,但遇到相同的错误。
有人可以帮助我解决此问题吗?
解决方法
conv2d op引发错误消息:
无法获得卷积算法。这可能是因为cuDNN无法初始化,所以请尝试查看上面是否打印了警告日志消息。
从上面看,我们看到
创建的TensorFlow设备(/ job:localhost /副本:0 /任务:0 /设备:GPU:0,具有3891 MB内存)->物理GPU(设备:0,名称:GeForce GTX 1650 Ti,pci总线ID: 0000:01:00.0,计算能力:7.5)
无法从设备分配3.80G(4080218880字节):
CUDA_ERROR_OUT_OF_MEMORY:内存不足
无法从设备分配3.42G(3672196864字节):
CUDA_ERROR_OUT_OF_MEMORY:内存不足
因此,该图形将比GeForce GTX 1650 Ti(3891 MB)上的可用内存更多。尝试使用较小的输入图像大小和/或较小的批处理大小。
,问题在于为Tensorflow 2.3.0设置GPU增长。 正确设置后,我可以摆脱错误。
import tensorflow as tf
from tensorflow.compat.v1.keras.backend import set_session
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.compat.v1.Session(config=config)
set_session(sess)