[Theano]TypeError:成本必须是标量

问题描述

正在进行一个研究项目,需要我为 DNN 编写正则化器。

import lasagne
from lasagne.nonlinearities import leaky_rectify,softmax
import theano,theano.tensor as T
import numpy as np
import sklearn.datasets,sklearn.preprocessing,sklearn.model_selection
import matplotlib.pyplot as plt
from tabulate import tabulate
import time
import math



#psi function that will be used in the penalty function
def psi(g,l): 
    m = g.shape[1]
    C = (1/T.pow(2,m))*(1/T.pow(math.pi,((m-1)/2))) / (T.gamma((m+1)/2))
    logDens = T.log(C) + m*T.log(l) - l*T.sqrt(T.sum(g**2))
    dens = T.exp(logDens)
    return(dens)

#pstar function that will be used in the penalty function
def pStar(g,lambda1,lambda0,theta):
    psi1 = psi(g,lambda1)
    psi0 = psi(g,lambda0)
    ## if a coefficient is really large then both these will numerically be zero 
    if theta*psi1 ==0 and (1-theta)*psi0==0:
        p = 1
    else:
        p = (theta*psi1) / (theta*psi1 + (1 - theta)*psi0)           
    return p
    
#Seperable 
def pen_S(l):
    theta = 0.5
    lambda1 = 1
    lambda0 = 12
    for j in range(len(l)):
        t = l[j]
        m = t.shape[1]
        n = t.shape[0].eval()
        cost = T.zeros((1,1))            
        for i in range(n):
            g = t[i]
            temp = -lambda1*T.sum(g**2) + T.log(pStar(T.zeros((1,m)),theta)/pStar(g,theta))
            cost = cost + temp
    return cost 
    
# Number of simulations
N_runs = 1

# Maximum number of epochs
max_epochs = 1500

# Define number of layers and number of neurons
H_layers = np.asarray([40,20])

# Minibatch size
batch_size = 300

# Lasagne Regularizers to be tested
regularizers = [pen_S]

# Define the regularization factors for each algorithm
reg_factors = [10**-3.5]

# Define the names (for display purposes)
names = ['SSGL_Sep']

# Load the dataset (DIGITS)
digits = sklearn.datasets.load_digits()
X = digits.data
y = digits.target

# MNIST
#mnist = sklearn.datasets.fetch_mldata('MNIST original',data_home='C:/Users/ISPAMM/Downloads')
#X = mnist.data
#y = mnist.target

# Preprocessing (input)
scaler = sklearn.preprocessing.MinMaxScaler()
X = scaler.fit_transform(X)

# Output structures
tr_errors = np.zeros((len(regularizers),N_runs))
tst_errors = np.zeros((len(regularizers),N_runs))
tr_times = np.zeros((len(regularizers),N_runs))
tr_obj = np.zeros((len(regularizers),N_runs,max_epochs))
sparsity_weights = np.zeros((len(regularizers),len(H_layers)+1))
sparsity_neurons = np.zeros((len(regularizers),len(H_layers)+1))

# Define the input and output symbolic variables
input_var = T.matrix(name='X')
target_var = T.ivector(name='y')

# Utility function for minibatches
def iterate_minibatches(inputs,targets,batchsize,shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0,len(inputs) - batchsize + 1,batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx,start_idx + batchsize)
        yield inputs[excerpt],targets[excerpt]

for k in np.arange(0,N_runs):
    
    print("Run ",k+1," of ","...\n",end="")

    # Split the data
    X_train,X_test,y_train,y_test = sklearn.model_selection.train_test_split(X,y,test_size=0.25)

    # Define the network structure
    network = lasagne.layers.InputLayer((None,X.shape[1]),input_var)
    for h in H_layers:
        network = lasagne.layers.DenseLayer(network,h,nonlinearity=leaky_rectify,W=lasagne.init.Glorotnormal())
    network = lasagne.layers.DenseLayer(network,len(np.unique(y)),nonlinearity=softmax,W=lasagne.init.Glorotnormal())
    params_original = lasagne.layers.get_all_param_values(network)    
    params = lasagne.layers.get_all_params(network,trainable=True)
    
    # Define the loss function
    prediction = lasagne.layers.get_output(network)
    loss = lasagne.objectives.categorical_crossentropy(prediction,target_var)

    # Define the test function
    test_prediction = lasagne.layers.get_output(network,deterministic=True)
    test_acc = T.mean(T.eq(T.argmax(test_prediction,axis=1),target_var),dtype=theano.config.floatX)
    test_fn = theano.function([input_var,target_var],test_acc,allow_input_downcast=True)    
    
     
    for r in np.arange(0,len(regularizers)):
        
        # Set to original parameters
        lasagne.layers.set_all_param_values(network,params_original)        
        
        # Define the regularized loss function
        loss_reg = loss.mean() + reg_factors[r] * lasagne.regularization.regularize_network_params(network,regularizers[r])
    
        # Update function
        # updates_reg = lasagne.updates.nesterov_momentum(loss_reg,params,learning_rate=0.01)
        updates_reg = lasagne.updates.adam(loss_reg,params)
        
        # Training function
        train_fn = theano.function([input_var,loss_reg,updates=updates_reg,allow_input_downcast=True)
    
        # Train network
        print("\tTraining with ",names[r]," regularization,epoch: ",end="")
        start = time.time()
        for epoch in range(max_epochs):
            loss_epoch = 0
            batches = 0
            if np.mod(epoch,10) == 0:
                print(epoch,"... ",end="")
            for batch in iterate_minibatches(X_train,batch_size,shuffle=True):
                input_batch,target_batch = batch
                loss_epoch += train_fn(input_batch,target_batch)
                batches += 1
            tr_obj[r,k,epoch] = loss_epoch/batches
        end = time.time()
        tr_times[r,k] = end - start
        print(epoch,".")
        
        # Final test with accuracy
        print("\tTesting the network with "," regularization...")
        tr_errors[r,k] = test_fn(X_train,y_train)
        tst_errors[r,k] = test_fn(X_test,y_test)
        
        # Check sparsity
        params_trained = lasagne.layers.get_all_param_values(network,trainable=True)
        sparsity_weights[r,:] = [1-(x.round(decimals=3).ravel().nonzero()[0].shape[0]/x.size) for x in params_trained[0::2]]
        sparsity_neurons[r,:] = [x.round(decimals=3).sum(axis=1).nonzero()[0].shape[0] for x in params_trained[0::2]]

tr_obj_mean = np.mean(tr_obj,axis=1)

# Plot the average loss
plt.figure()
plt.title('Training objective')
for r in np.arange(0,len(regularizers)):
    plt.semilogy(tr_obj_mean[r,:],label=names[r])
plt.legend()

# Print the results
print(tabulate([['Tr. accuracy [%]'] + np.mean(tr_errors,axis=1).round(decimals=4).tolist(),['Test. accuracy [%]'] + np.mean(tst_errors,['Tr. times [secs.]'] + np.mean(tr_times,['Sparsity [%]'] + np.mean(sparsity_weights,['Neurons'] + np.mean(sparsity_neurons,axis=1).round(decimals=4).tolist()],headers=['']+names))

这是我定义的正则化器 pen_S(l),但是当我运行代码来训练网络时,我被提示“TypeError: cost must be a scalar”。但是我觉得我的 pen_S 输出已经是标量了。

谁能帮我解决这个问题?

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)

相关问答

Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其...
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。...
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbc...