问题描述
晚上好,
下面显示的代码代表了一个actor网络的训练功能。这个想法是在一个循环中使用它来控制一个系统。所有的控制动作都是由此产生的 神经网络,它们应该收敛到交叉熵方法提出的值,从而最小化成本。后者由一个似乎运行良好的评论家神经网络逼近。
当系统运行时,交叉熵方法函数返回令人信服的值,但参与者继续增加控制动作,增加误差。
可能是什么问题?
'''
代码
#输入数据被归一化
def Actor_train(state_train,action_train,force_train,Actor_NN,Critic_NN,Model_NN,xdh_intro,Kh,xy_norm,Nh,learning_rate):
#action_mean_,action_std_ = action_norm
#fh_mean_,fh_std_ = fh_norm
Actor_NN_ = Actor_NN
Critic_NN_ = Critic_NN
Model_NN_ = Model_NN
Nh_ = Nh
learning_rate_ = learning_rate
xy_norm_ = xy_norm
x_mean_v_,x_std_v_,y_mean_v_,y_std_v_ = xy_norm_ #variables to normalize
state_not_norm_ = state_train*x_std_v_[0:2] + x_mean_v_[0:2] #denormalization
# normalized quantities
state_train_ = np.copy(state_train[0:Nh_,:])
action_train_ = Variable(torch.from_numpy(action_train[0:Nh_].squeeze(1)),requires_grad=True)
# action_train comes from a neural network
force_train_ = np.copy(force_train[0:Nh_])
Actor_NN_.to("cpu") # putting the model into GPU (but not available Now)
optimizer_A = torch.optim.Adam(Actor_NN_.get_parameters(),lr =learning_rate_)
errore_f = nn.MSELoss()
cache_loss_ = []
resultant_loss_ = []
iteration_ = 0
for j in range(0,Nh_-1):
Actor_NN.train()
optimizer_A.zero_grad()
# Cross entropy method to estimate u minimizing the output of the critic network
U_npiu1 = CEM_critic(state_not_norm_[j:j+1],1,5,64,xy_norm_,Cost_norm,critic,num_ensembles_cem_= 5)[0]
action_from_NN = torch.DoubleTensor(action_train_[j+1:j+2])
action_from_CEM = torch.DoubleTensor(U_npiu1)[0]
print("NN ",action_from_NN[0])
print("CEM ",action_from_CEM)
# Cost function
Error_a = errore_f.forward(action_from_NN[0],action_from_CEM)
Error_a.backward(retain_graph=True) # backprop
optimizer_A.step() # updating the parameters
Nh_loss_ = Error_a
# torch.cuda.empty_cache()
cache_loss_.append(Nh_loss_.item())
iteration_ =+1
if (j+1)%2 ==0:
resultant_ = np.sum(cache_loss_)/iteration_
resultant_loss_.append(resultant_)
iteration_ = 0
cache_loss_ =[]
print("epoch:",j+1," Actor Training_loss:",resultant_)
plt.plot(resultant_loss_,'r')
plt.ylabel('Loss')
plt.xlabel('epochs')
plt.title('actor')
plt.grid(True)
plt.show()
print("Final training loss: ",resultant_loss_[-1])
return
'''
解决方法
也许你想要iteration_ += 1
而不是你拥有的:iteration_ =+1