ML:自己动手实现逻辑回归算法(1)

2018-09-05  本文已影响47人  ACphart

介绍

描述

import numpy as np
import matplotlib.pyplot as plt
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = 'all'

生成模拟数据

z = np.linspace(-10, 10, 100)
sigm = 1./(1. + np.exp(-z))
_ = plt.plot(z, sigm)
_ = plt.ylim(0, 1)
np.random.seed(20180823)

m = 100   # 样本量
xlim = 4  # 数据采样范围

x0 = np.ones((m, 1))
x1 = np.random.rand(m, 1)*xlim*2 - xlim
x2 = np.random.rand(m, 1)*xlim*2 - xlim
ty = 1./(1. + np.exp(-(x1 + x2))) + np.random.randn(m, 1)*0.2
y = np.round(ty)
_ = plt.scatter(x1, x2, c=y, cmap='PiYG')
_ = plt.plot(x1, -x1, 'r')
_ = plt.ylim(-4.5, 3.5)

假设函数

损失函数和梯度

梯度和梯度下降

向量化

根据向量化之后的数学式构建我们的函数

# 假设函数
def h_theta(X, theta):
    return 1./(1. + np.exp(- np.dot(X, theta)))

# 损失函数
def loss_func(X, theta, y):
    y1 = np.log(h_theta(X, theta))
    y0 = np.log(1. - h_theta(X, theta))
    return -1./m * (np.dot(y.T, y1) + np.dot((1. - y.T), y0))
    
# 梯度函数
def grad_func(X, theta, y):
    return 1./m * np.dot(X.T, h_theta(X, theta) - y)

使用梯度下降算法进行训练

np.random.seed(20180823)
# 设置学习率和收敛开关
alpha = 0.1
stop = 1e-6

i = 1
index = 1
c = np.array([0.8, 0.8, 0.8])   # 设置颜色,颜色逐渐加深

theta = np.random.randn(3, 1)
X = np.hstack((x0, x1, x2))
grad = grad_func(X, theta, y)
while not np.all(abs(grad) <= stop):
    theta = theta - alpha*grad
    grad = grad_func(X, theta, y)
    
    # 作出学习过程
    i = i+1
    if i%index == 0:
        yline = -theta[0]/theta[2] - theta[1]/theta[2]*x1
        _ = plt.plot(x1, yline, color=c)
        c = c - 0.1
        index = index*4

_ = plt.scatter(x1, x2, c=y, cmap='PiYG')
_ = plt.plot(x1, -x1, 'r')
_ = plt.ylim(-4.5, 3.5)

测试预测性能

# 测试数据
np.random.seed(2018082302) #修改随机种子

test_x0 = np.ones((m, 1))
test_x1 = np.random.rand(m, 1)*xlim*2 - xlim
test_x2 = np.random.rand(m, 1)*xlim*2 - xlim
test_ty = 1./(1. + np.exp(-(test_x1 + test_x2))) + np.random.randn(m, 1)*0.2
test_y = np.round(test_ty)
test_X = np.hstack((test_x0, test_x1, test_x2))
y_ = h_theta(test_X, theta)
pre_y = np.round(y_)

acc = sum(int(a == b) for a, b in zip(pre_y, test_y))/m
acc
0.95
上一篇 下一篇

猜你喜欢

热点阅读