可视化三维样本数据点的PCA过程
2020-07-01 本文已影响0人
__method__
可视化三维样本数据点的PCA过程
import numpy as np
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
# 在三维空间随机生成100个样本点
np.random.seed(6666)
X = np.random.random(size=(100, 3))
绘制
ax = plt.axes(projection='3d')
ax.scatter3D(X[:,0], X[:,1], X[:,2])
plt.show()
def f(w, X):
return np.sum((X.dot(w)**2)) / len(X)
def df(w, X):
return X.T.dot(X.dot(w)) * 2. / len(X)
def direction(w):
return w / np.linalg.norm(w)
def first_component(X, initial_w, eta, n_iters = 1e4, epsilon=1e-8):
w = direction(initial_w)
cur_iter = 0
while cur_iter < n_iters:
gradient = df(w, X)
last_w = w
w = w + eta * gradient
w = direction(w)
if(abs(f(w, X) - f(last_w, X)) < epsilon):
break
cur_iter += 1
return w
initial_w = np.random.random(X.shape[1])
eta = 0.01
w1 = first_component(X, initial_w, eta)
w1 # 求出第一主成分
array([0.61952027, 0.55653544, 0.55359095])
# 将原来的所有样本点X去除在第一主成分上的分量,得到X2
X2 = np.empty(X.shape)
for i in range(len(X)):
X2[i] = X[i] - X[i].dot(w1) * w1
# 绘制X2,此时,X2的所有样本点分布在一个平面上
ax = plt.axes(projection='3d')
ax.scatter3D(X2[:,0], X2[:,1], X2[:,2])
plt.show()
w2 = first_component(X2, initial_w, eta)
w2 # 求出第二主成分
array([-0.71119474, 0.69644447, 0.0957452 ])
# 将X2去除在第二主成分上的分量,得到X3
X3 = np.empty(X2.shape)
for i in range(len(X)):
X3[i] = X2[i] - X2[i].dot(w2) * w2
# 绘制X3,此时,X3的所有样本点分布在一条直线上
ax = plt.axes(projection='3d')
ax.scatter3D(X3[:,0], X3[:,1], X3[:,2])
plt.show()