分别用矩阵的方法和神经网络的方法实现PCA

2021-09-13  本文已影响0人  酌泠
# File name:
# Description:
# Author: zeng
# Time: 2019/11/21
# Change Activity:

import numpy as np
import matplotlib.pyplot as plt

seed = 1


def zero_mean(data):  # 零均值化函数
    mean = np.mean(data, axis=0)
    return data - mean


def normalize(data):  # 归一化
    m = np.max(data)
    return data / m


def norm_2(vector):  # 计算2范数
    return np.sqrt(np.sum(vector**2))


def theta(vector1, vector2):  # 计算两向量夹角余弦值
    modulus1 = norm_2(vector1)
    modulus2 = norm_2(vector2)
    return np.dot(vector1, vector2) / (modulus1*modulus2)


class PcaByNet:
    """
    A neural network based PCA (Oja PCA), it is a Online Learning Model.
    """

    def __init__(self, n_features):

        self.y = None  # 初始化网络输出
        self.weight = np.random.randn(n_features)  # 初始化网络权重

        self.norm_recorder = []  # 记录每次迭代的范数
        self.theta_recorder = []  # 记录每次迭代的夹角余弦

    def fit(self, data, v, max_epochs=1000, lr_rate=1e-3, e=1e-5):  # 学习函数

        n_sample = len(data)

        for epoch in range(max_epochs):
            for i in range(n_sample):
                x = data[i]
                w = self.weight
                y = np.matmul(x, w)

                change = lr_rate * (y*x - y*y*w)  # 权值改变量
                self.weight = w + change  # 更新权值
                self.y = np.matmul(x, self.weight)  # 更新输出

                error = np.abs(norm_2(w) - 1)

                self.norm_recorder.append(norm_2(w))  # 记录范数
                self.theta_recorder.append(theta(w, v))  # 记录夹角余弦
                if error < e:
                    print('迭代停止:', epoch * n_sample + i, epoch, i)
                    return epoch * n_sample + i  # 返回迭代次数
        print('迭代停止,未收敛')
        return max_epochs * n_sample  # 返回迭代次数


class PCA:
    def __init__(self, n_components=1):
        self.n_components = n_components
        self.cov = None
        self.e_values = None
        self.e_vectors = None

    def fit(self, data):
        # data = self.zero_mean(data)
        cov = np.cov(data, rowvar=False)  # rowvar=0,说明传入的数据一行代表一个样本,若非0,说明传入的数据一列代表一个样本
        self.cov = cov

        e_values, e_vectors = np.linalg.eig(cov)
        e_vectors = np.transpose(e_vectors)  # 注意特征向量是列,不是行

        # 排序
        index = sorted(range(len(e_values)), key=lambda k: e_values[k])
        e_values = np.array(sorted(e_values))
        e_vectors = np.array([e_vectors[k] for k in index])

        self.e_values = e_values
        self.e_vectors = e_vectors

        return e_values[self.n_components::], e_vectors[self.n_components::]


def data_generator(n_samples, limit=20):
    np.random.seed(seed)
    x = np.random.randint(limit, size=(n_samples, 1))
    y = 2*x
    noise1 = np.random.randn(n_samples, 1)
    noise2 = np.random.randn(n_samples, 1)
    x = x+noise1
    y = y+noise2
    data = np.hstack((x, y))
    return data


if __name__ == '__main__':
    ds = data_generator(20)

    # 初始化模型
    pca = PcaByNet(2)
    ds = normalize(ds)
    ds_pro = zero_mean(ds)

    # 用矩阵计算主成分
    model = PCA()
    e, v = model.fit(ds_pro)
    print('PCA VIA Matrix-Method')
    print(f'特征值\t{e}')
    print(f'特征向量\t{v}')

    # 用网络计算主成分
    pca.fit(ds_pro, v[0], 1000, 0.01, 1e-4)  # 归一的参数
    # pca.fit(data_pro, v[0], 1000, 0.001, 1e-4)  # 不归一的参数

    print(f'PCA VIA NET-Method')
    print(f'特征向量\t{pca.weight}')

    # 绘数据图
    # 画数据散点
    x1 = ds_pro[:, 0]
    x2 = ds_pro[:, 1]
    plt.subplot(131)
    plt.scatter(x1, x2)

    # 画斜线和坐标轴
    plt.plot([-0.5, 0.5], [-1, 1])  # 画斜线
    plt.plot([-1, 1], [0, 0], c='black')  # 画横轴
    plt.plot([0, 0], [-1, 1], c='black')  # 画竖轴

    # 画特征向量
    plt.plot([0, pca.weight[0]], [0, pca.weight[1]], c='red', label='neural net')
    plt.plot([0, v[0, 0]], [0, v[0, 1]], c='green', label='matrix')
    plt.legend()

    # 画范数变化
    plt.subplot(132)
    plt.plot(pca.norm_recorder)

    # 画余弦变化
    plt.subplot(133)
    plt.plot(pca.theta_recorder)

    plt.show()

上一篇 下一篇

猜你喜欢

热点阅读