matplotlib可视化练习 -- 鸢尾花数据集
2017-09-10 本文已影响485人
b485c88ab697
matplotlib可视化练习
%matplotlib inline
import matplotlib as mpl
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
- 萼片(sepal)和花瓣(petal)的大小关系(散点图)
- 不同种类(species)鸢尾花萼片和花瓣的大小关系(分类散点子图)
- 不同种类鸢尾花萼片和花瓣大小的分布情况(柱状图或者箱式图)
data = sns.load_dataset("iris")
data.head()
# 萼片长度,萼片宽度,花瓣长度,花瓣宽度,种类
Paste_Image.png
萼片(sepal)和花瓣(petal)的大小关系(散点图)
data['sepal_size'] = data['sepal_length'] * data['sepal_width']
data['petal_size'] = data['petal_length'] * data['petal_width']
plt.scatter(data['sepal_size'],data['petal_size'])
output_14_1.png
不同种类(species)鸢尾花萼片和花瓣的大小关系(分类散点子图)
先来看下有多少种类
t = data.groupby(['species']).size()#3种
t.index
Index(['setosa', 'versicolor', 'virginica'], dtype='object', name='species')
data[data['species'].values == 'setosa']['sepal_size']
0 17.85
1 14.70
2 15.04
3 14.26
4 18.00
5 21.06
6 15.64
7 17.00
8 12.76
9 15.19
10 19.98
11 16.32
12 14.40
13 12.90
14 23.20
15 25.08
16 21.06
17 17.85
18 21.66
19 19.38
20 18.36
21 18.87
22 16.56
23 16.83
24 16.32
25 15.00
26 17.00
27 18.20
28 17.68
29 15.04
30 14.88
31 18.36
32 21.32
33 23.10
34 15.19
35 16.00
36 19.25
37 17.64
38 13.20
39 17.34
40 17.50
41 10.35
42 14.08
43 17.50
44 19.38
45 14.40
46 19.38
47 14.72
48 19.61
49 16.50
Name: sepal_size, dtype: float64
plt.figure()
flag = 1
for name in data.groupby(['species']).size().index:
sepal_size = data[data['species'].values == name]['sepal_size']
petal_size = data[data['species'].values == name]['petal_size']
plt.subplot(2,2,flag)
plt.scatter(sepal_size.values,petal_size.values)
flag += 1
plt.show()
output_19_0.png
不同种类鸢尾花萼片和花瓣大小的分布情况(柱状图或者箱式图)
柱状图
plt.figure(figsize=(20,20))
flag = 1
for name in data.groupby(['species']).size().index:
sepal_size = data[data['species'].values == name]['sepal_size']
petal_size = data[data['species'].values == name]['petal_size']
plt.subplot(2,2,flag)
plt.bar(sepal_size.values,petal_size.values)
plt.title(name)
flag += 1
plt.show()
output_22_0.png
箱式图
plt.figure(figsize=(20,20))
flag = 1
for name in data.groupby(['species']).size().index:
sepal_size = data[data['species'].values == name]['sepal_size']
petal_size = data[data['species'].values == name]['petal_size']
plt.subplot(3,3,flag)
plt.boxplot(sepal_size.values
,patch_artist = True
# 中位数线颜色
, medianprops = {'color': 'b'}
# 箱子颜色设置,color:边框颜色,facecolor:填充颜色
, boxprops = {'color': 'b', 'facecolor': 'r'}
# 猫须颜色whisker
, whiskerprops = {'color': 'r'}
# 猫须界限颜色whisker cap
, capprops = {'color': 'b'})
plt.title(name +'+sepal_size')
plt.subplot(3,3,flag * 2)
plt.boxplot(sepal_size.values,
patch_artist = True
# 中位数线颜色
, medianprops = {'color': 'b'}
# 箱子颜色设置,color:边框颜色,facecolor:填充颜色
, boxprops = {'color': 'b', 'facecolor': 'r'}
# 猫须颜色whisker
, whiskerprops = {'color': 'r'}
# 猫须界限颜色whisker cap
, capprops = {'color': 'b'})
plt.title(name +'+petal_size')
flag += 1
plt.show()
output_24_0.png