问题描述
我已经用JAVA搭建了一个反向传播的神经元网络,并尝试使用批量梯度下降法拟合一些数据,但在大约10000轮训练后我才得到每个数据的相似值。但是对于只有一个数据,结果已经足够好了。网络有9个输入、1个输出和2个隐藏层,每个隐藏层有25个神经元和1个偏置
///////////////////////////////////////////// ////////
一个数据的信息:
- 输入数据:
2.7734725 1.55890012 1.03137298 1.31096406 91.74919483 38.41713338 124.7455604 14.7469924 -12215
- 网络生成的结果和真实的结果
计算结果 = [1.1033203607365611] 实际结果 = [1.12]
///////////////////////////////////////////// //////////
批量梯度下降信息
输入数据
1:2.7734725 1.55890012 1.03137298 1.31096406 91.74919483 38.41713338 124.7455604 14.7469911111515
2:3.09290183 1.07195943 0.95975968 1.2292225 179.5196048 151.317491 123.8542234 -104.46997481 -113
3: 3.07936524 1.44102341 0.92088287 1.20511177 91.45398761 19.9612519 33.39778958 174.4891676717675
4:1.72748681 1.40982529 0.94609285 1.15367887 102.10286 140.4601186 154.9999046 -123.15089755 8959
5: 2.21764552 1.54368486 1.02631772 1.19154162 79.89443452 68.40163644 149.0862576 -152.3800.734>-152.3800.7342
真实数据和计算结果:
计算结果 = [122.91412579592004] 实际结果 = [1.12]
计算结果 = [122.91412579592004] 实际结果 = [0.11]
计算结果 = [122.91412579592004] 实际结果 = [15.1]
计算结果 = [122.91412579592004] 实际结果 = [20931.15]
计算结果 = [122.91412490543053] 实际结果 = [1003.56]
///////////////////////////////////////////// ///
public static void main(String[] args) {
//initialize the NN-function in inner class
int[] layerNum = new int[]{9,25,1};
int[] activeFunction = new int[]{0,1,2};
Function function = new Function(layerNum,activeFunction);
double[] parameterList = new double[function.getParameterList().length];
double[] parameterListDelta = new double[function.getParameterList().length];
double[][] array = GetArrayFromFile.magic("C:\\Users\\74719\\Desktop\\test\\database_compress.out");
double[] input = new double[9];
double[] output = new double[1];
for (int rowCount = 0; rowCount < 1; rowCount++) {
int row = rowCount;
for (int i = 1; i < array[row].length; i++) {
input[i - 1] = array[row][i];
System.out.print(input[i - 1] + " ");
}
output[0] = array[row][0];
System.out.println(output[0]);
double[] calcOutput = new double[output.length];
for (int roundCount = 0; roundCount < 6000; roundCount++) {
//System.out.println("calculate result = " + Arrays.toString(calcOutput) + " real result = " + Arrays.toString(output));
parameterList = function.getParameterList();
parameterListDelta = function.getWeightDelta(input,output);
for (int i = 0; i < parameterList.length; i++) {
parameterList[i] -= 0.0001 * parameterListDelta[i];
}
function.setParameter(parameterList);
}
calcOutput = function.compute(input);
System.out.println("calculate result = " + Arrays.toString(calcOutput) + " real result = " + Arrays.toString(output));
}
}
批量梯度下降的主函数
public static void main(String[] args) {
//initialize the NN-function in inner class
int[] layerNum = new int[]{9,activeFunction);
double[] parameterList = new double[function.getParameterList().length];
double[] parameterListDelta = new double[function.getParameterList().length];
double[][] array = GetArrayFromFile.magic("C:\\Users\\74719\\Desktop\\test\\database_compress.out");
int batch = 5;
double[][] input = new double[batch][9];
double[][] output = new double[batch][1];
for (int row = 0; row < batch; row++) {
for (int i = 1; i < array[row].length; i++) {
input[row][i - 1] = array[row][i];
}
output[row][0] = array[row][0];
}
double[][] calcOutput = new double[batch][output.length];
for (int roundCount = 0; roundCount < 2; roundCount++) {
parameterList = function.getParameterList();
parameterListDelta = function.getDerivative(input[0]);
for (int row = 1; row < batch; row++) {
parameterListDelta = ArrayProcess.matrixPlus(parameterListDelta,function.getWeightDelta(input[row],output[row]));
}
for (int i = 0; i < parameterList.length; i++) {
parameterList[i] -= 0.0001 * parameterListDelta[i];
}
function.setParameter(parameterList);
}
for (int row = 0; row < batch; row++) {
calcOutput[row] = function.compute(input[row]);
System.out.println("calculate result = " + Arrays.toString(calcOutput[row]) + " real result = " + Arrays.toString(output[row]));
}
}
功能应该优化
class Function {
private int[] layerNum;
private int[] activeFunction;
private double[][] layer;
private double[][] layerErr;
private double[][][] layerWeight;
private double[][][] layerWeightDelta;
private double[][][] errorTranspose;
private double[] parameterList;
private double[] parameterListDelta;
private double[] parameterListPartial;
public Function(int[] layerNum,int[] activeFunction) {
System.out.println("/////////////////////////////////////////////////////");
this.layerNum = layerNum;
this.activeFunction = activeFunction;
layer = new double[layerNum.length][];
layerErr = new double[layerNum.length][];
layerWeight = new double[layerNum.length - 1][][];
layerWeightDelta = new double[layerNum.length - 1][][];
errorTranspose = new double[layerNum.length - 1][][];
for (int i = 0; i < layer.length; i++) {
layer[i] = new double[layerNum[i]];
layerErr[i] = new double[layerNum[i]];
}
Random random = new Random();
int count = 0;
for (int i = 0; i < layerWeight.length; i++) {
layerWeight[i] = new double[layerNum[i + 1]][layerNum[i] + 1];
layerWeightDelta[i] = new double[layerNum[i + 1]][layerNum[i] + 1];
errorTranspose[i] = new double[layerNum[i + 1]][layerNum[i]];
for (int j = 0; j < layerWeight[i].length; j++) {
for (int k = 0; k < layerWeight[i][j].length; k++) {
layerWeight[i][j][k] = random.nextDouble();
layerWeightDelta[i][j][k] = random.nextDouble();
count++;
}
}
}
parameterList = new double[count];
count = 0;
for (int i = 0; i < layerWeight.length; i++) {
for (int j = 0; j < layerWeight[i].length; j++) {
for (int k = 0; k < layerWeight[i][j].length; k++) {
parameterList[count] = layerWeight[i][j][k];
count++;
}
}
}
parameterListDelta = new double[count];
parameterListPartial = new double[count];
System.out.println("initial finished");
System.out.println("structure : ");
System.out.print("layer : ");
for (int i = 0; i < layerNum.length - 1; i++) {
System.out.print(layerNum[i] + " - ");
}
System.out.println(layerNum[layerNum.length - 1]);
System.out.print("activeFunction : ");
for (int i = 0; i < activeFunction.length - 1; i++) {
switch (activeFunction[i]) {
case 1 -> System.out.print("tansig - ");
case 2 -> System.out.print("pureLine - ");
default -> System.out.print("sigmoid - ");
}
}
switch (activeFunction[activeFunction.length - 1]) {
case 1 -> System.out.println("tansig");
case 2 -> System.out.println("pureLine");
default -> System.out.println("sigmoid");
}
System.out.println("/////////////////////////////////////////////////////");
}
public double[] compute(double[] input) {
double[] output = new double[layer[layer.length - 1].length];
for (int i = 0; i < input.length; i++) {
layer[0][i] = input[i];
}
for (int i = 1; i < layer.length; i++) {
for (int j = 0; j < layer[i].length; j++) {
double z = layerWeight[i - 1][j][layerWeight[i - 1][j].length - 1];
for (int k = 0; k < layer[i - 1].length; k++) {
z += layerWeight[i - 1][j][k] * layer[i - 1][k];
}
switch (activeFunction[i - 1]) {
case 1 -> layer[i][j] = 2 / (1 + Math.exp(-2 * z)) - 1;
case 2 -> layer[i][j] = z;
default -> layer[i][j] = 1 / (1 + Math.exp(-z));
}
}
}
for (int i = 0; i < layer[layer.length - 1].length; i++) {
output[i] = layer[layer.length - 1][i];
}
return output;
}
/**
* the $\frac{\part E}{\part w_{i,j}^l}$
*
* @param input
* @param realOutput
* @return
*/
public double[] getWeightDelta(double[] input,double[] realOutput) {
compute(input);
for (int i = 0; i < layer[layer.length - 1].length; i++) {
double delta = layer[layer.length - 1][i] - realOutput[i];
switch (activeFunction[layer.length - 2]) {
case 1 -> delta *= 1 - Math.pow(layer[layer.length - 1][i],2);
case 2 -> {
}
default -> delta *= layer[layer.length - 1][i] * (1 - layer[layer.length - 1][i]);
}
layerErr[layer.length - 1][i] = delta;
layerWeightDelta[layer.length - 2][i][layerWeightDelta[layer.length - 2][i].length - 1] = delta;
for (int j = 0; j < layer[layer.length - 2].length - 1; j++) {
layerWeightDelta[layer.length - 2][i][j] = layer[layer.length - 2][j] * delta;
errorTranspose[layer.length - 2][i][j] = layerWeight[layer.length - 2][i][j] * delta;
}
}
for (int l = layer.length - 2; l > 0; l--) {
for (int i = 0; i < layer[l].length; i++) {
double delta = 0;
for (int j = 0; j < layer[l + 1].length; j++) {
delta += layerWeight[l][j][i] * layerErr[l + 1][j];//layerWeight[l][j][i]*layerErr[l+1][j] == errorTranspose[l][j][i]
}
switch (activeFunction[l - 1]) {
case 1 -> delta *= 1 - Math.pow(layer[l][i],2);
case 2 -> {
}
default -> delta *= layer[l][i] * (1 - layer[l][i]);
}
layerWeightDelta[l - 1][i][layerWeightDelta[l - 1][i].length - 1] = delta;
layerErr[l][i] = delta;
for (int j = 0; j < layer[l - 1].length; j++) {
layerWeightDelta[l - 1][i][j] = layer[l - 1][j] * delta;
errorTranspose[l - 1][i][j] = layerWeight[l - 1][i][j] * delta;
}
}
}
int count = 0;
for (int i = 0; i < layerWeightDelta.length; i++) {
for (int j = 0; j < layerWeightDelta[i].length; j++) {
for (int k = 0; k < layerWeightDelta[i][j].length; k++) {
parameterListDelta[count] = layerWeightDelta[i][j][k];
count++;
}
}
}
return parameterListDelta;
}
/**
* jacobi :($\frac{\part r_i}{\part w_{i,j}^l}$) auch the$\frac{\part (y-calc)}{\part w_{i,j}^l}$
*
* @param input
* @return derivative of each parameters
*/
public double[] getDerivative(double[] input) {
compute(input);
for (int i = 0; i < layer[layer.length - 1].length; i++) {
double delta = -1;
switch (activeFunction[layer.length - 2]) {
case 1 -> delta *= 1 - Math.pow(layer[layer.length - 1][i],2);
case 2 -> {
}
default -> delta *= layer[layer.length - 1][i] * (1 - layer[layer.length - 1][i]);
}
layerErr[layer.length - 1][i] = delta;
layerWeightDelta[layer.length - 2][i][layerWeightDelta[layer.length - 2][i].length - 1] = delta;
for (int j = 0; j < layer[layer.length - 2].length - 1; j++) {
layerWeightDelta[layer.length - 2][i][j] = layer[layer.length - 2][j] * delta;
errorTranspose[layer.length - 2][i][j] = layerWeight[layer.length - 2][i][j] * delta;
}
}
for (int l = layer.length - 2; l > 0; l--) {
for (int i = 0; i < layer[l].length; i++) {
double delta = 0;
for (int j = 0; j < layer[l + 1].length; j++) {
delta += layerWeight[l][j][i] * layerErr[l + 1][j];//layerWeight[l][j][i]*layerErr[l+1][j] == errorTranspose[l][j][i]
}
switch (activeFunction[l - 1]) {
case 1 -> delta *= 1 - Math.pow(layer[l][i],2);
case 2 -> {
}
default -> delta *= layer[l][i] * (1 - layer[l][i]);
}
layerWeightDelta[l - 1][i][layerWeightDelta[l - 1][i].length - 1] = delta;
layerErr[l][i] = delta;
for (int j = 0; j < layer[l - 1].length; j++) {
layerWeightDelta[l - 1][i][j] = layer[l - 1][j] * delta;
errorTranspose[l - 1][i][j] = layerWeight[l - 1][i][j] * delta;
}
}
}
int count = 0;
for (int i = 0; i < layerWeightDelta.length; i++) {
for (int j = 0; j < layerWeightDelta[i].length; j++) {
for (int k = 0; k < layerWeightDelta[i][j].length; k++) {
parameterListPartial[count] = layerWeightDelta[i][j][k];
count++;
}
}
}
return parameterListPartial;
}
public double[][] getJacobi(double[][] inputBatch) {
double[][] jacobi = new double[inputBatch.length][inputBatch[0].length];
for (int i = 0; i < inputBatch.length; i++) {
jacobi[i] = getDerivative(inputBatch[i]);
}
return jacobi;
}
public double[] getParameterList() {
return parameterList;
}
public void setParameter(double[] parameter) {
parameterList = parameter;
int count = 0;
for (int i = 0; i < layerWeight.length; i++) {
for (int j = 0; j < layerWeight[i].length; j++) {
for (int k = 0; k < layerWeight[i][j].length; k++) {
layerWeight[i][j][k] = parameter[count];
count++;
}
}
}
}
}
我也用一些方法来实现矩阵运算,比如矩阵加法和矩阵乘法,虽然简单但是代码很多,这里就不复制了。而且我这里提到的代码没有问题。
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)