sklearn层次聚类对种子特性数据的分析
2021-06-10 本文已影响0人
一路向后
1.数据文件seeds-less-rows.csv
area,perimeter,compactness,length,width,asymmetry_coefficient,groove_length,grain_variety
14.88,14.57,0.8811,5.5539999999999985,3.333,1.018,4.956,Kama
14.69,14.49,0.8799,5.563,3.259,3.5860000000000003,5.2189999999999985,Kama
14.03,14.16,0.8796,5.438,3.201,1.717,5.001,Kama
13.99,13.83,0.9183,5.119,3.383,5.234,4.7810000000000015,Kama
14.11,14.26,0.8722,5.52,3.168,2.688,5.2189999999999985,Kama
13.02,13.76,0.8641,5.395,3.026,3.373,4.825,Kama
15.49,14.94,0.8724,5.7570000000000014,3.3710000000000004,3.412,5.228,Kama
16.2,15.27,0.8734,5.8260000000000005,3.464,2.823,5.527,Kama
13.5,13.85,0.8852,5.351,3.158,2.249,5.176,Kama
15.36,14.76,0.8861,5.7010000000000005,3.393,1.367,5.1320000000000014,Kama
15.78,14.91,0.8923,5.674,3.434,5.593,5.136,Kama
14.46,14.35,0.8818,5.388,3.377,2.802,5.044,Kama
11.23,12.63,0.884,4.902,2.879,2.269,4.703,Kama
14.34,14.37,0.8726,5.63,3.19,1.3130000000000002,5.15,Kama
16.84,15.67,0.8623,5.997999999999998,3.484,4.675,5.877000000000002,Rosa
17.32,15.91,0.8599,6.064,3.403,3.824,5.9220000000000015,Rosa
18.72,16.19,0.8977,6.006,3.857,5.324,5.879,Rosa
18.88,16.26,0.8969,6.084,3.764,1.649,6.109,Rosa
18.76,16.2,0.8984,6.1720000000000015,3.796,3.12,6.053,Rosa
19.31,16.59,0.8815,6.341,3.81,3.477,6.238,Rosa
17.99,15.86,0.8992,5.89,3.694,2.068,5.8370000000000015,Rosa
18.85,16.17,0.9056,6.152,3.806,2.843,6.2,Rosa
19.38,16.72,0.8716,6.303,3.791,3.678,5.965,Rosa
18.96,16.2,0.9077,6.051,3.897,4.334,5.75,Rosa
18.14,16.12,0.8772,6.059,3.563,3.619,6.011,Rosa
18.65,16.41,0.8698,6.285,3.594,4.391,6.102,Rosa
18.94,16.32,0.8942,6.144,3.825,2.908,5.949,Rosa
17.36,15.76,0.8785,6.145,3.574,3.526,5.971,Rosa
13.32,13.94,0.8613,5.541,3.073,7.035,5.44,Canadian
11.43,13.13,0.8335,5.176,2.719,2.221,5.1320000000000014,Canadian
12.01,13.52,0.8249,5.405,2.776,6.992000000000001,5.27,Canadian
11.34,12.87,0.8596,5.053,2.849,3.347,5.003,Canadian
12.02,13.33,0.8503,5.35,2.81,4.271,5.308,Canadian
12.44,13.59,0.8462,5.319,2.897,4.924,5.27,Canadian
11.55,13.1,0.8455,5.167000000000002,2.845,6.715,4.956,Canadian
11.26,13.01,0.8355,5.186,2.71,5.335,5.092,Canadian
12.46,13.41,0.8706,5.2360000000000015,3.017,4.987,5.147,Canadian
11.81,13.45,0.8198,5.4129999999999985,2.716,4.898,5.352,Canadian
11.27,12.86,0.8563,5.091,2.804,3.985,5.001,Canadian
12.79,13.53,0.8786,5.224,3.054,5.483,4.958,Canadian
12.67,13.32,0.8977,4.984,3.135,2.3,4.745,Canadian
11.23,12.88,0.8511,5.14,2.795,4.325,5.003,Canadian
2.源码实现
import pandas as pd
from scipy.cluster.hierarchy import linkage, dendrogram
import matplotlib.pyplot as plt
from matplotlib.ticker import MultipleLocator
# 读取数据集
seeds_df = pd.read_csv('./seeds-less-rows.csv')
print(seeds_df.head())
print(seeds_df.grain_variety.value_counts())
# 去除标识行及类别行
varieties = list(seeds_df.pop('grain_variety'))
samples = seeds_df.values
# 进行层次聚类
mergings = linkage(samples, method='complete')
# 树状图结果
plt.figure(figsize=(10,8), dpi=80)
ax = plt.subplot(111)
dendrogram(mergings, labels=varieties, leaf_rotation=90, leaf_font_size=10)
yminorLocator = MultipleLocator(0.1)
ax.yaxis.set_minor_locator(yminorLocator)
plt.savefig("1.png")
3.运行及其结果
$ python3 example.py
area perimeter compactness length width asymmetry_coefficient \
0 14.88 14.57 0.8811 5.554 3.333 1.018
1 14.69 14.49 0.8799 5.563 3.259 3.586
2 14.03 14.16 0.8796 5.438 3.201 1.717
3 13.99 13.83 0.9183 5.119 3.383 5.234
4 14.11 14.26 0.8722 5.520 3.168 2.688
groove_length grain_variety
0 4.956 Kama
1 5.219 Kama
2 5.001 Kama
3 4.781 Kama
4 5.219 Kama
Rosa 14
Canadian 14
Kama 14
Name: grain_variety, dtype: int64
