（五）完全随机梯度下降

2020-04-06 本文已影响0人羽天驿

一、代码

import numpy as np
import matplotlib.pyplot as plt

X = np.linspace(-2,12,40).reshape(-1,1)

w = np.random.randint(2,12,size = 1)

b = np.random.randint(-10,10,size = 1)

y = X*w + b + np.random.randn(40,1)*4.5

# 将y.reshape(-1)一维的
y = y.reshape(-1)

plt.scatter(X,y,color = 'red')

<matplotlib.collections.PathCollection at 0x2c3eeaa3ec8>

output_1_1.png

用方法，实现梯度下降

m是样本的数量

$\nabla_{\theta}J(\theta) = \frac{2}{m}X^T(X\theta - y)$

$f(x) = b+ w_1x + w_2x^2 + w_3x^3$

$f(x) = bx^0 + w_1x + w_2x^2 + w_3x^3$

$f(x) = w_0x^0 + w_1x + w_2x^2 + w_3x^3$

对数据X增加了一列，这一列对应着，截距

# 作为训练数据，增加了一列，截距
X_train = np.concatenate([X,np.ones(shape = (40,1))],axis = 1)
X_train

array([[-2.        ,  1.        ],
       [-1.64102564,  1.        ],
       [-1.28205128,  1.        ],
       [-0.92307692,  1.        ],
       [-0.56410256,  1.        ],
       [-0.20512821,  1.        ],
       [ 0.15384615,  1.        ],
       [ 0.51282051,  1.        ],
       [ 0.87179487,  1.        ],
       [ 1.23076923,  1.        ],
       [ 1.58974359,  1.        ],
       [ 1.94871795,  1.        ],
       [ 2.30769231,  1.        ],
       [ 2.66666667,  1.        ],
       [ 3.02564103,  1.        ],
       [ 3.38461538,  1.        ],
       [ 3.74358974,  1.        ],
       [ 4.1025641 ,  1.        ],
       [ 4.46153846,  1.        ],
       [ 4.82051282,  1.        ],
       [ 5.17948718,  1.        ],
       [ 5.53846154,  1.        ],
       [ 5.8974359 ,  1.        ],
       [ 6.25641026,  1.        ],
       [ 6.61538462,  1.        ],
       [ 6.97435897,  1.        ],
       [ 7.33333333,  1.        ],
       [ 7.69230769,  1.        ],
       [ 8.05128205,  1.        ],
       [ 8.41025641,  1.        ],
       [ 8.76923077,  1.        ],
       [ 9.12820513,  1.        ],
       [ 9.48717949,  1.        ],
       [ 9.84615385,  1.        ],
       [10.20512821,  1.        ],
       [10.56410256,  1.        ],
       [10.92307692,  1.        ],
       [11.28205128,  1.        ],
       [11.64102564,  1.        ],
       [12.        ,  1.        ]])

根据矩阵求解的梯度，进行梯度下降

生成系数时，必须考虑形状

def gradient_descent(X,y):
    m = 1# 从40个样本中随机选取1个样本，计算梯度
    theta = np.random.randn(2) # theta中既有斜率，又有截距
    last_theta = theta + 0.1 #记录theta更新后，和上一步的误差
    precision = 1e-4 #精确度
    epsilon = 0.01 #步幅
    count= 0
    while True:
#         当斜率和截距误差小于万分之一时，退出
        if (np.abs(theta - last_theta) < precision).all():
            break
        if count > 50000:#死循环执行了3000次
            break
#         更新
        last_theta = theta.copy()
#     随机梯度下降，梯度是矩阵计算返回的
        index = np.random.choice(np.arange(40),size = m)# index索引，根据随机索引从原数据中取数据
        grad = 2/m*X[index].T.dot(X[index].dot(theta) - y[index])
        theta -= epsilon*grad
        count += 1
    return theta
w_,b_ = gradient_descent(X_train,y)
j = lambda x : w_*x + b_
plt.scatter(X[:,0],y,color = 'red')
x_test = np.linspace(-2,12,1024) 
y_ = j(x_test)
plt.plot(x_test,y_,color = 'green')

[<matplotlib.lines.Line2D at 0x2c3eec8d388>]

output_10_1.png

def gradient_descent(X,y):
    m = 5# 从40个样本中随机选取1个样本，计算梯度
    theta = np.random.randn(2) # theta中既有斜率，又有截距
    last_theta = theta + 0.1 #记录theta更新后，和上一步的误差
    precision = 1e-4 #精确度
    epsilon = 0.01 #步幅
    count= 0
    while True:
#         当斜率和截距误差小于万分之一时，退出
        if (np.abs(theta - last_theta) < precision).all():
            break
        if count > 10000:#死循环执行了3000次
            break
#         更新
        last_theta = theta.copy()
#     随机梯度下降，梯度是矩阵计算返回的
        index = np.random.choice(np.arange(40),size = m)# index索引，根据随机索引从原数据中取数据
        grad = 2/m*X[index].T.dot(X[index].dot(theta) - y[index])
        theta -= epsilon*grad
        count += 1
    return theta
w_,b_ = gradient_descent(X_train,y)
j = lambda x : w_*x + b_
plt.scatter(X[:,0],y,color = 'red')
x_test = np.linspace(-2,12,1024) 
y_ = j(x_test)
plt.plot(x_test,y_,color = 'green')

[<matplotlib.lines.Line2D at 0x2c3eecf7f08>]

output_11_1.png