RMSprop优化器不会改变精度和损耗

问题描述

数据集为CIFAR10。我创建了一个类似于VGG的网络:

class FirstModel(nn.Module):
    def __init__(self):
        super(FirstModel,self).__init__()
        self.vgg1 = nn.Sequential(
            nn.Conv2d(3,16,3,padding=1),nn.Batchnorm2d(16),nn.ReLU(),nn.Conv2d(16,nn.MaxPool2d(2,2),nn.Dropout(0.2)
        )

        self.vgg2 = nn.Sequential(
            nn.Conv2d(16,32,nn.Batchnorm2d(32),nn.Conv2d(32,nn.Dropout(0.2)
        )

        self.vgg3 = nn.Sequential(
            nn.Conv2d(32,64,nn.Batchnorm2d(64),nn.Conv2d(64,nn.Dropout(0.2)
        )

        self.fc1 = nn.Linear(4 * 4 * 64,4096)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(4096,4096)
        self.fc3 = nn.Linear(4096,10)
        self.softmax = nn.softmax()
        self.dropout = nn.Dropout(0.5)


    def forward(self,x):
        x = self.vgg3(self.vgg2(self.vgg1(x)))
        x = nn.Flatten()(x)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.softmax(self.fc3(x))

        return x

然后我训练它并可视化损失和准确性:

import matplotlib.pyplot as plt
from IPython.display import clear_output

def plot_history(train_history,val_history,title='loss'):
    plt.figure()
    plt.title('{}'.format(title))
    plt.plot(train_history,label='train',zorder=1)
    
    points = np.array(val_history)
    steps = list(range(0,len(train_history) + 1,int(len(train_history) / len(val_history))))[1:]
    
    plt.scatter(steps,marker='*',s=180,c='red',label='val',zorder=2)
    plt.xlabel('train steps')
    
    plt.legend(loc='best')
    plt.grid()

    plt.show()

def train_model(model,optimizer,train_DataLoader,test_DataLoader):
      criterion = nn.CrossEntropyLoss() 
      
      train_loss_log = []
      train_acc_log = []
      val_loss_log = []
      val_acc_log = []

      for epoch in range(NUM_EPOCH):
        
        model.train()
        
        train_loss = 0.
        train_size = 0
        train_acc = 0.


        for inputs,labels in train_DataLoader:
          
          inputs,labels = inputs.to(device),labels.to(device)

          optimizer.zero_grad()
          y_pred = model(inputs) 

          loss = criterion(y_pred,labels) 
          loss.backward()
          optimizer.step()

          train_loss += loss.item()
          train_size += y_pred.size(0)
          train_loss_log.append(loss.data / y_pred.size(0))

          _,pred_classes = torch.max(y_pred,1)
          train_acc += (pred_classes == labels).sum().item()
          train_acc_log.append(np.mean((pred_classes == labels).cpu().numpy()))

        # блок validation
        val_loss = 0.
        val_size = 0
        val_acc = 0.
        
        model.eval()
        with torch.no_grad():
            for inputs,labels in test_DataLoader:
                inputs,labels.to(device)
                y_pred = model(inputs)
                loss = criterion(y_pred,labels)
                val_loss += loss.item()
                val_size += y_pred.size(0)

                _,1)
                val_acc += (pred_classes == labels).sum().item()
        
        val_loss_log.append(val_loss/val_size)
        val_acc_log.append(val_acc/val_size)

        clear_output()
        plot_history(train_loss_log,val_loss_log,'loss')
        plot_history(train_acc_log,val_acc_log,'accuracy')

        print('Train loss:',train_loss / train_size)
        print('Train acc:',train_acc / train_size)
        print('Val loss:',val_loss / val_size)
        print('Val acc:',val_acc / val_size)

然后我训练模型:

first_model = FirstModel()
first_model.to(device)

optimizer = optim.RMSprop(first_model.parameters(),lr=0.001,momentum=0.9)

train_model(first_model_rms,test_DataLoader)

损耗和精度不变(精度为0.1)。但是,如果优化器是带有动量的SGD,则一切正常(损耗和精度更改)。我已经尝试过更改动量和lr,但这无济于事。

应该解决什么问题?将不胜感激任何可能的建议!

解决方法

尝试进一步降低学习率.....如果这样对准确性和损失也没有影响,则将优化器更改为adams或其他内容,并以不同的学习率进行游戏。

,

因此,首先,您不必像nn.CrossEntropyLoss那样在“模型”中使用softmax,而且我也认为RMSprop不能与动量一起使用。