问题描述
学习神经网络我已经编写了自己的课程。
import numpy as np
import random
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
def sigmoid_prime(x):
return sigmoid(x) * (1 - sigmoid(x))
def linear(x):
return x
def linear_prime(x):
return 1
def tanh(x):
return (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
def tanh_prime(x):
return 1 - tanh(x)*tanh(x)
class Network:
def __init__(self,sizes,activation_func = sigmoid,activation_prime = sigmoid_prime):
self.biases = [np.random.randn(x,1) for x in sizes[1:]]
self.weights = [np.random.randn(y,x) for x,y in zip(sizes,sizes[1:])]
self.num_layers = len(sizes)
self.sizes = sizes
self.activation_function = activation_func
self.actiovation_prime = activation_prime
def forward_prop(self,a):
for w,b in zip(self.weights,self.biases):
a = self.activation_function(np.dot(w,a) + b)
return a
def cost_derivative(self,output_activations,y):
return (output_activations - y)
def backprop(self,x,y):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
# forward pass
activation = x # first activation,which is input layer
a_mas = [x]
z_mas = []
for b,w in zip(self.biases,self.weights):
z = np.dot(w,activation) + b
activation = self.activation_function(z)
z_mas.append(z)
a_mas.append(activation)
pass
# backward pass
delta = self.cost_derivative(a_mas[-1],y) * self.actiovation_prime(z_mas[-1])
nabla_b[-1] = delta
nabla_w[-1] = np.dot(delta,a_mas[-2].T)
for l in range(2,self.num_layers): # there is 2 such as we've already done for last layer
delta = np.dot(self.weights[-l + 1].transpose(),delta) * self.actiovation_prime(z_mas[-l])
nabla_b[-l] = delta
nabla_w[-l] = np.dot(delta,a_mas[-l - 1].T)
return nabla_b,nabla_w
def update_mini_batch(self,mini_batch,eta):
nabla_b = [np.zeros(b.shape) for b in self.biases]
nabla_w = [np.zeros(w.shape) for w in self.weights]
for x,y in mini_batch:
delta_nabla_b,delta_nabla_w = self.backprop(x,y)
nabla_b = [nb + dnb for nb,dnb in zip(nabla_b,delta_nabla_b)]
nabla_w = [nw + dnw for nw,dnw in zip(nabla_w,delta_nabla_w)]
eps = eta / len(mini_batch)
self.weights = [w - eps * nw for w,nw in zip(self.weights,nabla_w)]
self.biases = [b - eps * nb for b,nb in zip(self.biases,nabla_b)]
def SGD(self,training_data,epochs,mini_batch_size,eta):
n = len(training_data)
for j in range(epochs):
random.shuffle(training_data)
mini_batches = [training_data[k:k + mini_batch_size]
for k in range(0,n,mini_batch_size)]
for mini_batch in mini_batches:
self.update_mini_batch(mini_batch,eta)
现在我正试图借助该网络来近似sin()。但是下面的以下代码无法正常工作。
%matplotlib inline
import matplotlib.pyplot as plt
net2 = Network([1,100,1],tanh,tanh_prime)
x = np.linspace(0,10,1000)
y = np.sin(x)
train = [(np.array(x[i]).reshape(1,1),np.array(y[i]).reshape(1,1)) for i in range(len(x))]
net2.SGD(train,0.1)
y_pred = []
y_tmp = []
for i in range(len(x)):
y_tmp.append(net2.forward_prop(train[i][0]))
y_pred.append(float(net2.forward_prop(train[i][0])))
plt.plot(x,y_look,'r',y_pred)
plt.grid()
我已经尝试借助MNIST数据集在数字识别中实现此网络。那里一切正常。但是我无法获得比70%更高的准确度,但这不是问题。但是这里我不知道出了什么问题。激活函数是tanh()。
解决方法
据我所知,您正在使函数subset(df,!PID %in% PID[Stage >= 4])
最小化,您可能希望将其更改为均方误差(f(x) - y
)或均绝对误差((f(x) - y)^2
),分别为足以解决像您这样的回归问题。对于诸如MNIST之类的分类问题,交叉熵是一个不错的选择。
此外,您可能会尝试在输出层中删除tanh函数。我不认为|f(x) - y|
是个问题,因为输出在-1和1之间,但是通常,对于回归问题,我们使用线性激活,对于分类问题,我们保留诸如Sigmoid和tanh的压扁函数。