主成分分析PCA

2023-01-10  本文已影响0人  y_7539
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
%matplotlib inline

df = pd.read_csv("datas/iris_data.csv")
df.head()
image.png
# x 和 y
x = df.drop(["target", "label"], axis=1)
y = df["label"]

# 模型训练和准确率 0.96
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(x, y)
y_predict = knn.predict(x)
accuracy_score(y, y_predict)

# 数据标准化处理
from sklearn.preprocessing import StandardScaler
x_stand = StandardScaler().fit_transform(x)

# 取一个维度查看未处理的均值和标准差和标准化处理后的有什么不同
x1_mean = df["sepal length"].mean()
x1_stand_mean = x_stand[:, 0].mean()
x1_std= df["sepal length"].std()
x1_stand_std = x_stand[:, 0].std()
print(x1_mean, x1_stand_mean, x1_std, x1_stand_std)
image.png
from sklearn.decomposition import PCA
# pca 同等维度
pca = PCA(n_components=4)
x_pca = pca.fit_transform(x_stand)
# 主成分方差
var_radio = pca.explained_variance_ratio_  #array([0.72770452, 0.23030523, 0.03683832, 0.00515193])
image.png
# pca降维 只保留方差最大的两个维度
pca = PCA(n_components=2)
x_pca = pca.fit_transform(x_stand)
image.png
# 用knn查看降维数据
knn = KNeighborsClassifier()
knn.fit(x_pca, y)
knn_predict = knn.predict(x_pca)
accuracy_score(y, knn_predict) # 0.9466666666666667
``





上一篇 下一篇

猜你喜欢

热点阅读