matplotlib可视化练习 -- 鸢尾花数据集

2017-09-10  本文已影响485人  b485c88ab697

matplotlib可视化练习

%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
data = sns.load_dataset("iris")
data.head()
# 萼片长度,萼片宽度,花瓣长度,花瓣宽度,种类
Paste_Image.png

萼片(sepal)和花瓣(petal)的大小关系(散点图)

data['sepal_size'] = data['sepal_length'] * data['sepal_width']
data['petal_size'] = data['petal_length'] * data['petal_width']
plt.scatter(data['sepal_size'],data['petal_size'])
output_14_1.png

不同种类(species)鸢尾花萼片和花瓣的大小关系(分类散点子图)

先来看下有多少种类

t = data.groupby(['species']).size()#3种
t.index
Index(['setosa', 'versicolor', 'virginica'], dtype='object', name='species')
data[data['species'].values == 'setosa']['sepal_size']
0     17.85
1     14.70
2     15.04
3     14.26
4     18.00
5     21.06
6     15.64
7     17.00
8     12.76
9     15.19
10    19.98
11    16.32
12    14.40
13    12.90
14    23.20
15    25.08
16    21.06
17    17.85
18    21.66
19    19.38
20    18.36
21    18.87
22    16.56
23    16.83
24    16.32
25    15.00
26    17.00
27    18.20
28    17.68
29    15.04
30    14.88
31    18.36
32    21.32
33    23.10
34    15.19
35    16.00
36    19.25
37    17.64
38    13.20
39    17.34
40    17.50
41    10.35
42    14.08
43    17.50
44    19.38
45    14.40
46    19.38
47    14.72
48    19.61
49    16.50
Name: sepal_size, dtype: float64
plt.figure()
flag = 1
for name in data.groupby(['species']).size().index:
    sepal_size = data[data['species'].values == name]['sepal_size']
    petal_size = data[data['species'].values == name]['petal_size']
    plt.subplot(2,2,flag)
    plt.scatter(sepal_size.values,petal_size.values)
    flag += 1
plt.show()
output_19_0.png

不同种类鸢尾花萼片和花瓣大小的分布情况(柱状图或者箱式图)

柱状图

plt.figure(figsize=(20,20))
flag = 1
for name in data.groupby(['species']).size().index:
    sepal_size = data[data['species'].values == name]['sepal_size']
    petal_size = data[data['species'].values == name]['petal_size']
    plt.subplot(2,2,flag)
    plt.bar(sepal_size.values,petal_size.values)
    plt.title(name)
    flag += 1
plt.show()
output_22_0.png

箱式图

plt.figure(figsize=(20,20))
flag = 1
for name in data.groupby(['species']).size().index:
    sepal_size = data[data['species'].values == name]['sepal_size']
    petal_size = data[data['species'].values == name]['petal_size']
    plt.subplot(3,3,flag)
    plt.boxplot(sepal_size.values
                ,patch_artist = True
               # 中位数线颜色
               , medianprops = {'color': 'b'}
               # 箱子颜色设置,color:边框颜色,facecolor:填充颜色
               , boxprops = {'color': 'b', 'facecolor': 'r'}
               # 猫须颜色whisker
               , whiskerprops = {'color': 'r'}
               # 猫须界限颜色whisker cap
               , capprops = {'color': 'b'})
    plt.title(name +'+sepal_size')
    plt.subplot(3,3,flag * 2)
    plt.boxplot(sepal_size.values,
               patch_artist = True
               # 中位数线颜色
               , medianprops = {'color': 'b'}
               # 箱子颜色设置,color:边框颜色,facecolor:填充颜色
               , boxprops = {'color': 'b', 'facecolor': 'r'}
               # 猫须颜色whisker
               , whiskerprops = {'color': 'r'}
               # 猫须界限颜色whisker cap
               , capprops = {'color': 'b'})
    plt.title(name +'+petal_size')
    flag += 1
plt.show()
output_24_0.png
上一篇下一篇

猜你喜欢

热点阅读