TensorFlow 2：在图形模式下无限期地进行模型训练

问题描述

我正在以 Eager Execution 模式和图形模式训练以下模型。该模型在 Eager Execution 模式下训练良好，但在图形模式下无限期运行。我尝试以多种方式调试，但没有成功。

class CustomModelV2(tf.keras.Model):
    def __init__(self):
        super(CustomModelV2,self).__init__()
        self.encoder = Encoder(32)
        self.encoder.build(input_shape=(None,32))
        self.loss_tracker = tf.keras.metrics.Mean(name="loss")
        
    def call(self,inputs,training):
        return self.encoder(inputs,training)
        
    @property
    def metrics(self):
        # We list our `Metric` objects here so that `reset_states()` can be
        # called automatically at the start of each epoch
        # or at the start of `evaluate()`.
        # If you don't implement this property,you have to call
        # `reset_states()` yourself at the time of your choosing.
        return [self.loss_tracker]

    @tf.function
    def train_step(self,data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x,y = data

        with tf.GradientTape() as tape:
            y_pred = self.call(x,training=True)  # Forward pass

            # Compute the loss value
            # (the loss function is configured in `compile()`)
            r_loss = tf.keras.losses.mean_squared_error(y,y_pred)
            loss = r_loss 

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss,trainable_vars)
        
        # Update weights
        self.optimizer.apply_gradients(zip(gradients,trainable_vars))
        
        # Update metrics (includes the metric that tracks the loss)
        self.loss_tracker.update_state(loss)
        
        # Return a dict mapping metric names to current value
        return {"loss": self.loss_tracker.result()}

class Encoder(tf.keras.Model):
    def __init__(self,input_size):
        super(Encoder,self).__init__(name = 'Encoder')
        self.input_layer   = DenseLayer(128,input_size,0.0,'float32')
        self.hidden_layer1 = DenseLayer(128,128,0.001,'float32')
        self.dropout_laye1 = tf.keras.layers.Dropout(0.2)
        self.hidden_layer2 = DenseLayer(64,'float32')      
        self.dropout_laye2 = tf.keras.layers.Dropout(0.2)
        self.hidden_layer3 = DenseLayer(64,64,'float32')
        self.dropout_laye3 = tf.keras.layers.Dropout(0.2)           
        self.output_layer  = LinearLayer(64,'float32')
        
    def call(self,input_data,training):
        fx = self.input_layer(input_data)        
        fx = self.hidden_layer1(fx)
        if training:
            fx = self.dropout_laye1(fx)     
        fx = self.hidden_layer2(fx)
        if training:
            fx = self.dropout_laye2(fx) 
        fx = self.hidden_layer3(fx)
        if training:
            fx = self.dropout_laye3(fx) 
        return self.output_layer(fx)

class LinearLayer(tf.keras.layers.Layer):

    def __init__(self,units,input_dim,weights_regularizer,bias_regularizer,d_type):
        super(LinearLayer,self).__init__()
        self.w = self.add_weight(name='w_linear',shape = (input_dim,units),initializer = tf.keras.initializers.RandomUniform(
                                    minval=-tf.cast(tf.math.sqrt(6/(input_dim+units)),dtype = d_type),maxval=tf.cast(tf.math.sqrt(6/(input_dim+units)),seed=16751),regularizer = tf.keras.regularizers.l1(weights_regularizer),trainable = True)
        self.b = self.add_weight(name='b_linear',shape = (units,),initializer = tf.zeros_initializer(),regularizer = tf.keras.regularizers.l1(bias_regularizer),trainable = True)

    def call(self,inputs):
        return tf.matmul(inputs,self.w) + self.b

class DenseLayer(tf.keras.layers.Layer):

    def __init__(self,d_type):
        super(DenseLayer,self).__init__()
        self.w = self.add_weight(name='w_dense',initializer = tf.keras.initializers.RandomUniform(
                                     minval=-tf.cast(tf.math.sqrt(6.0/(input_dim+units)),maxval=tf.cast(tf.math.sqrt(6.0/(input_dim+units)),trainable = True)
        self.b = self.add_weight(name='b_dense',inputs):
        x = tf.matmul(inputs,self.w) + self.b
        return tf.nn.elu(x)

以下是训练模型的脚本：

# Just use `fit` as usual
x = tf.data.Dataset.from_tensor_slices(np.random.random((5000,32)))

y_numpy = np.random.random((5000,1))
y_numpy[:,3:] = None
y = tf.data.Dataset.from_tensor_slices(y_numpy)

x_window = x.window(30,shift=10,stride=1)
flat_x = x_window.flat_map(lambda t: t)
flat_x_scaled = flat_x.map(lambda t: t * 2)

y_window = y.window(30,stride=1)
flat_y = y_window.flat_map(lambda t: t)
flat_y_scaled = flat_y.map(lambda t: t * 2)

z = tf.data.Dataset.zip((flat_x_scaled,flat_y_scaled)).batch(32).cache().shuffle(buffer_size=32).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

# Stopping criteria if the training loss doesn't go down by 1e-3
early_stop_cb = tf.keras.callbacks.EarlyStopping(
    monitor='loss',min_delta = 1e-3,verbose = 1,mode='min',patience = 3,baseline=None,restore_best_weights=True)

# Construct and compile an instance of CustomModel
model = CustomModelV2()


  
model.compile(optimizer=tf.optimizers.Adagrad(0.01))

history = model.fit(z,epochs=3,callbacks=[early_stop_cb])

以下是图形模式下的输出：

WARNING:tensorflow:Output output_1 missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to output_1.
WARNING:tensorflow:From C:\Users\jain432\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\optimizer_v2\adagrad.py:87: calling Constant.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Train on None steps
Epoch 1/3
 479916/UnkNown - 667s 1ms/step - batch: 239957.5000 - size: 1.0000 - loss: 2.1716e-04

正如我所说，在急切执行模式下工作正常：

Epoch 1/3
468/468 [==============================] - 2s 3ms/step - loss: 0.4173
Epoch 2/3
468/468 [==============================] - 1s 3ms/step - loss: 0.3695
Epoch 3/3
468/468 [==============================] - 1s 3ms/step - loss: 0.3608

有人能帮我了解这里发生了什么以及我哪里做错了吗？

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

keras python-3.x subclassing tensorflow-datasets tensorflow2.x