问题描述
我最近升级了我的装备并在我的 1080 ti 旁边添加了一个 1070 ti 以加快训练速度。对于常规模型,由于这个,我能够更快地训练。相反,我正在尝试训练在单个 GPU 上工作的 GAN 算法,但我无法让它在多 GPU 设置上工作。 我覆盖 tf.keras.Model 并使用自定义 train_step 和其他一些功能。在我的一生中,如果不出现以下错误,我就无法正常运行:
tensorflow.python.framework.errors_impl.InvalidArgumentError: 3 root error(s) found.
(0) Invalid argument: Incompatible shapes: [8] vs. [16]
[[node add (defined at Users\<User>\OneDrive\Documenten\HKU\Year 4\PDP_and_SN\Supportive Narrative\Research\Alpha_2\lib\NN.py:120) ]]
[[replica_1/sequential_1/batch_normalization_10/Greater/_96]]
(1) Invalid argument: Incompatible shapes: [8] vs. [16]
[[node add (defined at Users\<User>\OneDrive\Documenten\HKU\Year 4\PDP_and_SN\Supportive Narrative\Research\Alpha_2\lib\NN.py:120) ]]
[[Adam_1/AddN/_140]]
(2) Invalid argument: Incompatible shapes: [8] vs. [16]
[[node add (defined at Users\<User>\OneDrive\Documenten\HKU\Year 4\PDP_and_SN\Supportive Narrative\Research\Alpha_2\lib\NN.py:120) ]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_18178]
我使用以下内容来创建我的模型:
class GAN_Model(tf.keras.Model):
def __init__(self,generator,discriminator,latent_dim,batch_size):
super(GAN_Model,self).__init__()
self.discriminator = discriminator
self.generator = generator
self.latent_dim = latent_dim
self.batch_size = batch_size
def compile(self,discriminator_optimizer,generator_optimizer,loss_function):
super(GAN_Model,self).compile()
self.discriminator_optimizer = discriminator_optimizer
self.generator_optimizer = generator_optimizer
self.loss_function = loss_function
def generator_loss(self,cross_entropy,fake_output):
return cross_entropy(tf.ones_like(fake_output),fake_output)
def discriminator_loss(self,real_output,fake_output):
real_loss = cross_entropy(tf.ones_like(real_output),real_output)
fake_loss = cross_entropy(tf.zeros_like(fake_output),fake_output)
total_loss = real_loss + fake_loss
return total_loss
def train_step(self,real_audio):
random_latent_vectors = tf.random.normal(shape=(self.batch_size,self.latent_dim))
with tf.GradientTape() as gen_tape,tf.GradientTape() as disc_tape:
generated_images = self.generator(random_latent_vectors,training = True)
real_output = self.discriminator(real_audio[0],training = True)
fake_output = self.discriminator(generated_images,training = True)
g_loss = self.generator_loss(self.loss_function,fake_output)
d_loss = self.discriminator_loss(self.loss_function,fake_output)
gradients_of_generator = gen_tape.gradient(g_loss,self.generator.trainable_variables)
gradients_of_discriminator = disc_tape.gradient(d_loss,self.discriminator.trainable_variables)
self.generator_optimizer.apply_gradients(zip(gradients_of_generator,self.generator.trainable_variables))
self.discriminator_optimizer.apply_gradients(zip(gradients_of_discriminator,self.discriminator.trainable_variables))
return {"d_loss": d_loss,"g_loss": g_loss,"prediction": generated_images}
mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0","/gpu:1"],cross_device_ops=tf.distribute.HierarchicalcopyAllReduce())
with mirrored_strategy.scope():
generator = make_generator(latent_dim)
discriminator = make_discriminator(spectral_size)
g_opt = tf.keras.optimizers.Adam(0.0001,beta_1=0.5)
d_opt = tf.keras.optimizers.Adam(0.00012,beta_1=0.5)
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True,reduction=tf.keras.losses.Reduction.NONE)
gan = GAN_Model(generator,batch_size)
gan.compile(
d_opt,g_opt,loss_fn,)
ckpt = tf.train.Checkpoint(generator_optimizer=g_opt,discriminator_optimizer=d_opt,generator=generator,disciminator=discriminator)
manager = tf.train.CheckpointManager(ckpt,".\\data\\checkpoints\\" + str(model_name),max_to_keep=15)
if restore_model:
ckpt.restore(manager.latest_checkpoint)
dataset = tf.data.experimental.load(dataset_dir,(tf.TensorSpec(shape=(spectral_size[0],spectral_size[1],spectral_size[2]),dtype=tf.double),tf.TensorSpec(shape=(2),dtype=tf.double)),compression="GZIP").batch(batch_size)
print(dataset)
history = gan.fit(dataset,epochs=epochs,callbacks=[generate_and_save_audio(manager,model_name)])
代码比这更扩展,但这里的某个地方应该是问题的本质。 谢谢!
解决方法
明白了!因为对于每个 gpu,它都将批次大小划分为它(所以在我的情况下,两个 gpu,它将把批次大小划分为 2。)我必须将模型的输入批次大小划分为 2,但是为了创建数据集,我可以保留batch_size 大小相同。