问题描述
我将打印品添加到“ discriminator_loss”功能中以查看发生了什么。首先,它告诉我两者的形状均为16。后来,它告诉我“ real_loss”的形状仅为15,而其他形式则保持为16。到目前为止,我仅尝试降低batchsize并将其增加1 ect。我提供了代码中最相关的部分。如果需要,我可以提供其余代码。我不知道为什么会这样,并且破坏了代码。
with strategy.scope():
BATCH_SIZE = 16
GLOBAL_BATCH_SIZE = 32#batchsize*# of gpus
im_size = 256
latent_size = 512
with strategy.scope():
cross_entropy = tf.keras.losses.BinaryCrossentropy(
from_logits=True,\
reduction = tf.keras.losses.Reduction.NONE)
#this is used to evaluate discriminators ability to discriminate
def discriminator_loss(real_output,fake_output):
real_loss = cross_entropy(tf.ones_like(real_output),real_output)#compares prediction to actual value of 1
fake_loss = cross_entropy(tf.zeros_like(fake_output),fake_output)#compares rediction to actual value of 0
print(real_loss)
print(fake_loss)
total_loss = real_loss + fake_loss
total_loss = total_loss/GLOBAL_BATCH_SIZE
return total_loss
#how well was generator able to trick discriminator
def generator_loss(fake_output):
gen_loss = cross_entropy(tf.ones_like(fake_output),fake_output)#compares predictions to the expected value 1 of a real image
gen_loss = gen_loss / GLOBAL_BATCH_SIZE
return gen_loss
with strategy.scope():
EPOCHS = 80
noise_dim = 512
num_examples_to_generate = 32
# We will reuse this seed overtime (so it's easier)
# to visualize progress in the animated GIF)
with strategy.scope():
def noise(n):
return tf.random.normal([n,latent_size])
def noiseImage(n):
return tf.random.uniform([n,im_size,1])
#seed = tf.random.normal([num_examples_to_generate,noise_dim])
#seed used to generate image>the discriminator than classifies real images from training set and a set of generated images>loss is calculated and gradients are used to update the model
# Notice the use of `tf.function`
# This annotation causes the function to be "compiled".
with strategy.scope():
#@tf.function
def train_step(images):
with tf.GradientTape() as gen_tape,tf.GradientTape() as disc_tape:
generated_images = generator((noise(BATCH_SIZE),noiseImage(BATCH_SIZE),np.ones([BATCH_SIZE,1])),training=True)
real_output = discriminator(images,training=True)
fake_output = discriminator(generated_images,training=True)
g_loss = generator_loss(fake_output)#runs generator loss
d_loss = discriminator_loss(real_output,fake_output)#runs disc loss
G_grads = gen_tape.gradient(g_loss,generator.trainable_variables)
D_grads = disc_tape.gradient(d_loss,discriminator.trainable_variables)
generator_optimizer.apply_gradients(zip(G_grads,generator.trainable_variables))
discriminator_optimizer.apply_gradients(zip(D_grads,discriminator.trainable_variables))
#run g_optim twice to make sure d_loss doesn't go to zero
with tf.GradientTape() as gen_tape:
generated_imgs = generator((noise(BATCH_SIZE),training=True)
fake_output = discriminator(generated_imgs,training=True)
g_loss = generator_loss(fake_output)
G_grads = gen_tape.gradient(g_loss,generator.trainable_variables)
generator_optimizer.apply_gradients(zip(G_grads,generator.trainable_variables))
return g_loss,d_loss
@tf.function
def distributed_train_step(dist_dataset):
per_replica_g_losses,per_replica_d_losses = strategy.run(train_step,args=(dist_dataset,))
total_g_loss = strategy.reduce(tf.distribute.ReduceOp.SUM,per_replica_g_losses,axis=0)
total_d_loss = strategy.reduce(tf.distribute.ReduceOp.SUM,per_replica_d_losses,axis=0)
return total_g_loss,total_d_loss
with strategy.scope():
def train(dist_dataset,epochs):
for epoch in range(epochs):
start = time.time()
for image_batch in dist_dataset:
total_g_loss,total_d_loss = distributed_train_step(image_batch)#runs train_step function
with strategy.scope():
train(dist_dataset,EPOCHS)#in some cases can take up to 20000 epochs to train well
错误和追溯
Traceback (most recent call last):
File "C:\image generator\pixiv\#image generator.py",line 507,in <module>
train(dist_dataset,EPOCHS)#in some cases can take up to 20000 epochs to train well
File "C:\image generator\pixiv\#image generator.py",line 441,in train
total_g_loss,total_d_loss = distributed_train_step(image_batch)#runs train_step function
File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py",line 580,in __call__
result = self._call(*args,**kwds)
File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py",line 611,in _call
return self._stateless_fn(*args,**kwds) # pylint: disable=not-callable
File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\function.py",line 2419,in __call__
graph_function,args,kwargs = self._maybe_define_function(args,kwargs)
File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\function.py",line 2777,in _maybe_define_function
graph_function = self._create_graph_function(args,line 2667,in _create_graph_function
capture_by_value=self._capture_by_value),File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\func_graph.py",line 981,in func_graph_from_py_func
func_outputs = python_func(*func_args,**func_kwargs)
File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\eager\def_function.py",in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args,**kwds)
File "C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\func_graph.py",line 968,in wrapper
raise e.ag_error_Metadata.to_exception(e)
ValueError: in user code:
C:\image generator\pixiv\#image generator.py:419 distributed_train_step *
per_replica_g_losses,))
C:\image generator\pixiv\#image generator.py:393 train_step *
d_loss = discriminator_loss(real_output,fake_output)#runs disc loss
C:\image generator\pixiv\#image generator.py:328 discriminator_loss *
total_loss = real_loss + fake_loss
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:984 binary_op_wrapper
return func(x,y,name=name)
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\ops\math_ops.py:1276 _add_dispatch
return gen_math_ops.add_v2(x,name=name)
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\ops\gen_math_ops.py:483 add_v2
"AddV2",x=x,y=y,name=name)
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:744 _apply_op_helper
attrs=attr_protos,op_def=op_def)
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\func_graph.py:595 _create_op_internal
compute_device)
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\ops.py:3327 _create_op_internal
op_def=op_def)
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\ops.py:1817 __init__
control_input_ops,op_def)
C:\Users\will\miniconda3\lib\site-packages\tensorflow\python\framework\ops.py:1657 _create_c_op
raise ValueError(str(e))
ValueError: Dimensions must be equal,but are 0 and 2 for '{{node replica_1/add}} = AddV2[T=DT_FLOAT](replica_1/binary_crossentropy_1/weighted_loss/Mul,replica_1/binary_crossentropy_2/weighted_loss/Mul)' with input shapes: [0],[2].
解决方法
因此,根据comments,问题在于批次大小不相等,这是因为最终批次小于指定的批次大小。我相信这是由于以下原因:
generated_images = generator((noise(BATCH_SIZE),noiseImage(BATCH_SIZE),np.ones([BATCH_SIZE,1])),training=True)
使用恒定大小BATCH_SIZE
而不是批次的实际输入形状,因此generated_images
的形状与images
的形状不同。
因此,上述解决方案之一就是在drop_remainder=True
中使用batch()
。但是,最好让生成器输出与输入形状相同的图像,因此,应该使用输入批处理的实际大小,而不是将BATCH_SIZE
作为参数传递给噪声生成函数。因此,也许使用tf.shape(images)[0]
会有所帮助。或者,您可以使用BATCH_SIZE
生成一批固定的图像,然后直接丢弃任何多余的图像,例如
num_images = tf.shape(images)[0]
generated_images = generated_images[:num_images]