使用 Tensorflow 2.0 进行一维分类

问题描述

我完全是 Tensorflow 的菜鸟,正在尝试使用 Tensorflow 2.0 实现实现二元分类sample code。 这是我的源代码

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

def sigmoid(x):
    return 1. / (1. + np.exp(-x))

learning_rate = 0.02
training_epochs = 600
display_step = 2

x1 = np.random.normal(-4,2,1000)
x2 = np.random.normal(4,1000)

x_train = np.append(x1,x2)
y_train = np.asarray([0.] * len(x1) + [1.] * len(x2))

n_samples = x_train.shape[0]

w = tf.Variable([0.0,0.0],name="parameters",trainable=True)

def model(x):
    y = tf.sigmoid(w[1] * x + w[0])
    return y

def cost(y_pred,y_true):
    return tf.reduce_mean(-y_pred * np.log(y_true) - (1-y_pred) * np.log(1-y_true)) / (2 * n_samples)

optimizer = tf.optimizers.SGD(learning_rate)

def run_optimization():
    with tf.GradientTape() as g:
        pred = model(x_train)
        loss = cost(pred,y_train)

    gradients = g.gradient(loss,[w])

    optimizer.apply_gradients(zip(gradients,[w]))

for step in range(1,training_epochs + 1):
    run_optimization()

    if step & display_step == 0:
        pred = model(x_train)
        loss = cost(pred,y_train)
        print(f'step {step},loss loss,w {w.numpy()}')

plt.plot(x_train,y_train,'ro',label='original_data')

all_xs = np.linspace(-10,10,100)
ys = sigmoid(w[1] * all_xs + w[0])

plt.plot(all_xs,ys,label='fitted line')
plt.legend()
plt.show()

但它不起作用。它警告被零除并不断返回 weights NaN

通过为 polynomial regression 修改代码而获得此代码,该代码按预期工作:

import sys
from time import sleep

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

learning_rate = 0.02
training_epochs = 600
display_step = 2

x_train = np.linspace(-1,1,101)
num_coeffs = 6
trY_coeffs = [1,3,4,5,6]
y_train = 0

for i in range(num_coeffs):
    y_train += trY_coeffs[i] * np.power(x_train,i)
y_train += np.random.randn(*x_train.shape) * 1.5

# plt.scatter(x_train,y_train)
# plt.show()


n_samples = x_train.shape[0]

w = tf.Variable([0.0] * num_coeffs,name="parameters")
# b = tf.Variable(1.0,name="weights")

def polynomial_regression(x):
    y = 0.
    for i in range(num_coeffs):
        y += w[i] * np.power(x,i)
    return y

def mean_square(y_pred,y_true):
    return tf.pow(y_pred - y_true,2) / (2 * n_samples)
    # return tf.reduce_sum(tf.pow(y_pred - y_true,2)) / (2 * n_samples)

optimizer = tf.optimizers.SGD(learning_rate)

def run_optimization():
    with tf.GradientTape() as g:
        pred = polynomial_regression(x_train)
        loss = mean_square(pred,training_epochs + 1):
    run_optimization()

    if step & display_step == 0:
        pred = polynomial_regression(x_train)
        loss = mean_square(pred,label='original_data')
y2 = 0
for i in range(num_coeffs):
    y2 += w[i] * np.power(x_train,i)

plt.plot(x_train,y2,label='fitted line')
plt.legend()
plt.show()

对我错过的事情有什么提示吗?

编辑 非常感谢@David Kaftan。现在它起作用了,我想。 但是,这种算法是否高度依赖于初始权重。我的意思是权重以非常非常缓慢的方式变化:

step 1000,w [1.2105826e-07 4.8849639e-03]
step 2000,w [4.8233795e-07 9.7108791e-03]
step 3000,w [1.0805354e-06 1.4478483e-02]
step 4000,w [1.9122353e-06 1.9188546e-02]
step 5000,w [2.9739347e-06 2.3841826e-02]
step 6000,w [4.2620072e-06 2.8439121e-02]
step 7000,w [5.7727916e-06 3.2981172e-02]
step 8000,w [7.502555e-06 3.746886e-02]
step 9000,w [9.4475008e-06 4.1902874e-02]
step 10000,w [1.1603816e-05 4.6284076e-02]
step 11000,w [1.3967622e-05 5.0613251e-02]
step 12000,w [1.6535043e-05 5.4891203e-02]
step 13000,w [1.9302177e-05 5.9118662e-02]
step 14000,w [2.2265122e-05 6.3296579e-02]
step 15000,w [2.5419984e-05 6.7425437e-02]
step 16000,w [2.8762855e-05 7.1506448e-02]
step 17000,w [3.2289867e-05 7.5539932e-02]
step 18000,w [3.599714e-05 7.952695e-02]
step 19000,w [3.9880848e-05 8.3468251e-02]
step 20000,w [4.393720e-05 8.736454e-02]
step 21000,w [4.8162416e-05 9.1216564e-02]
step 22000,w [5.2552758e-05 9.5025137e-02]
step 23000,w [5.710456e-05 9.879094e-02]
step 24000,w [6.1814208e-05 1.0251452e-01]
step 25000,w [6.6678018e-05 1.0619703e-01]
step 26000,w [7.1692499e-05 1.0983858e-01]
step 27000,w [7.68541649e-05 1.13440394e-01]
step 28000,w [8.21595750e-05 1.17003016e-01]
step 29000,w [8.76053527e-05 1.20526925e-01]
step 30000,w [9.3188115e-05 1.2401290e-01]
step 31000,w [9.8904748e-05 1.2746166e-01]
step 32000,w [1.0475191e-04 1.3087378e-01]
step 33000,w [1.1072658e-04 1.3425015e-01]
step 34000,w [1.1682553e-04 1.3759044e-01]
step 35000,w [1.2304573e-04 1.4089666e-01]
step 36000,w [1.2938443e-04 1.4416878e-01]
step 37000,w [1.3583856e-04 1.4740647e-01]
step 38000,w [1.4240552e-04 1.5061150e-01]
step 39000,w [1.4908194e-04 1.5378430e-01]
step 40000,w [1.5586588e-04 1.5692532e-01]
step 41000,w [0.00016275 0.16003501]
step 42000,w [0.00016974 0.16311383]
step 43000,w [0.00017683 0.16616225]
step 44000,w [0.00018402 0.1691808 ]
step 45000,w [0.0001913  0.17216995]
step 46000,w [0.00019867 0.17513026]
step 47000,w [0.00020614 0.17806228]
step 48000,w [0.00021369 0.18096656]
step 49000,w [0.00022132 0.18384369]
step 50000,w [0.00022904 0.18669426]
step 51000,w [0.00023684 0.18951795]
step 52000,w [0.00024472 0.19231497]
step 53000,w [0.00025267 0.19508705]
step 54000,w [0.0002607  0.19783483]
step 55000,w [0.00026881 0.20055585]
step 56000,w [0.00027698 0.20325364]
step 57000,w [0.00028523 0.20592771]
step 58000,w [0.00029354 0.20857717]
step 59000,w [0.00030192 0.21120515]
step 60000,w [0.00031036 0.21380803]
step 61000,w [0.00031887 0.21639079]
step 62000,w [0.00032743 0.21894895]
step 63000,w [0.00033606 0.22148749]
step 64000,w [0.00034474 0.22400282]
step 65000,w [0.00035348 0.22649813]
step 66000,w [0.00036228 0.22897272]
step 67000,w [0.00037113 0.23142584]
step 68000,w [0.00038003 0.23386018]
step 69000,w [0.00038898 0.23627393]
step 70000,w [0.00039799 0.23866759]
step 71000,w [0.00040704 0.24104299]
step 72000,w [0.00041613 0.24340007]
step 73000,w [0.00042528 0.2457364 ]
step 74000,w [0.00043446 0.24805506]
step 75000,w [0.0004437  0.25035614]
step 76000,w [0.00045297 0.25264123]
step 77000,w [0.00046229 0.2549062 ]
step 78000,w [0.00047165 0.25715274]
step 79000,w [0.00048104 0.2593879 ]
step 80000,w [0.00049047 0.26159722]
step 81000,w [0.00049995 0.2638004 ]
step 82000,w [0.00050945 0.26597598]
step 83000,w [0.000519   0.26814473]
step 84000,w [0.00052858 0.2702905 ]
step 85000,w [0.00053818 0.27242622]
step 86000,w [0.00054784 0.27454218]
step 87000,w [0.0005575  0.27664647]
step 88000,w [0.00056722 0.27873263]
step 89000,w [0.00057695 0.28080705]
step 90000,w [0.00058673 0.2828634 ]
step 91000,w [0.00059651 0.28490967]
step 92000,w [0.00060635 0.28693622]
step 93000,w [0.00061618 0.28895605]
step 94000,w [0.00062608 0.2909528 ]
step 95000,w [0.00063597 0.29294807]
step 96000,w [0.00064591 0.29491502]
step 97000,w [0.00065586 0.29688197]
step 98000,w [0.00066583 0.2988248 ]
step 99000,w [0.00067584 0.30076194]
step 100000,w [0.00068585 0.30268413]

EDIT 2. 修改后获得良好的训练速度cost

def cost(y_pred,y_true):
    return tf.reduce_mean(-y_true * tf.math.log( y_pred ) - (1-y_true) * tf.math.log(1-y_pred)) # / (2 * n_samples)

解决方法

你的成本函数看起来应该是二元交叉熵,你有什么:

def cost(y_pred,y_true):
    return tf.reduce_mean(-y_pred * np.log(y_true) - (1-y_pred) * np.log(1-y_true)) / (2 * n_samples)

非常接近,但pred和true混淆了。您收到错误消息,因为 log(0) 未定义。显然,y_true 的大部分都是零,所以你不能把它记录下来!正确的代价函数应该是

def cost(y_pred,y_true):
    return tf.reduce_mean(-y_true* tf.math.log( y_pred ) - (1-y_true) * tf.math.log(1-y_pred)) / (2 * n_samples)

编辑: 还必须将 np.log 更改为 tf.math.log

编辑 3(编辑 2 错误): 当您调用 tf.reduce_mean 时,您不需要除以样本数。这就是导致你的训练进展如此缓慢的原因。 tf.reduce_mean 隐式除以样本数,因此您实际上是除以两次。

def cost(y_pred,y_true):
    return tf.reduce_mean(-y_true* tf.math.log( y_pred ) - (1-y_true) * tf.math.log(1-y_pred))