Pandas 数据可视化
2019-07-29 本文已影响0人
我是李小胖
基本图形
柱状图
reviews['points'].value_counts().sort_index().plot.bar()
散点图
reviews[reviews['price'] < 100].sample(100).plot.scatter(x='price', y='points')
data:image/s3,"s3://crabby-images/e7967/e7967b67906dab610c5636218952e678f437566f" alt=""
蜂窝图
reviews[reviews['price'] < 100].plot.hexbin(x='price', y='points', gridsize=15)
data:image/s3,"s3://crabby-images/cc3d7/cc3d7863636f523e6260a7c33f9a8946efea94a6" alt=""
大量重复的点可以用这种图表示
柱状图-叠加模式
data:image/s3,"s3://crabby-images/2318f/2318f467cfe466201dde7b7b1f404ba506ce5e74" alt=""
wine_counts.plot.bar(stacked=True)
data:image/s3,"s3://crabby-images/8683f/8683f778ac09555b3a125c091ce12f00cf595366" alt=""
面积模式
wine_counts.plot.area()
折线模式
wine_counts.plot.line()
美化
设置图的大小,字体大小,颜色,标题
reviews['points'].value_counts().sort_index().plot.bar(
figsize=(12, 6),
color='mediumvioletred',
fontsize=16,
title='Rankings Given by Wine Magazine',
)
借助Matplotlib
import matplotlib.pyplot as plt
ax = reviews['points'].value_counts().sort_index().plot.bar(
figsize=(12, 6),
color='mediumvioletred',
fontsize=16
)
ax.set_title("Rankings Given by Wine Magazine", fontsize=20)
data:image/s3,"s3://crabby-images/dc819/dc819a93c7a33d22658f8427f672954a5d763773" alt=""
借助Seaborn-去除边框
import matplotlib.pyplot as plt
import seaborn as sns
ax = reviews['points'].value_counts().sort_index().plot.bar(
figsize=(12, 6),
color='mediumvioletred',
fontsize=16
)
ax.set_title("Rankings Given by Wine Magazine", fontsize=20)
sns.despine(bottom=True, left=True)
data:image/s3,"s3://crabby-images/a3576/a35761b306ab61e789a917cba7ac310e1fbc551f" alt=""
多图表
matplotlib
fig, axarr = plt.subplots(2, 2, figsize=(12, 8))
reviews['points'].value_counts().sort_index().plot.bar(
ax=axarr[0][0]
)
reviews['province'].value_counts().head(20).plot.bar(
ax=axarr[1][1]
data:image/s3,"s3://crabby-images/be214/be214fdc107d2af7a03d845c7b8ee63237b297c9" alt=""
Seaborn
df = footballers[footballers['Position'].isin(['ST', 'GK'])]
g = sns.FacetGrid(df, col="Position", col_wrap=2)
g.map(sns.kdeplot, "Overall")
data:image/s3,"s3://crabby-images/617a4/617a495409b5664a8ada6df8671086d540606411" alt=""
df = footballers[footballers['Position'].isin(['ST', 'GK'])]
df = df[df['Club'].isin(['Real Madrid CF', 'FC Barcelona', 'Atlético Madrid'])]
g = sns.FacetGrid(df, row="Position", col="Club")
g.map(sns.violinplot, "Overall")
data:image/s3,"s3://crabby-images/aad20/aad20e4f3632b56e883f11c61dd42bf268efe0d9" alt=""
df = footballers[footballers['Position'].isin(['ST', 'GK'])]
df = df[df['Club'].isin(['Real Madrid CF', 'FC Barcelona', 'Atlético Madrid'])]
g = sns.FacetGrid(df, row="Position", col="Club",
row_order=['GK', 'ST'],
col_order=['Atlético Madrid', 'FC Barcelona', 'Real Madrid CF'])
g.map(sns.violinplot, "Overall")
控制显示顺序
pairplot-多变量的相互关系
sns.pairplot(footballers[['Overall', 'Potential', 'Value']])
data:image/s3,"s3://crabby-images/51318/51318225ee4966584517b420d0b6ae8c84683eb8" alt=""
颜色,图标参数
sns.lmplot(
x='Value', y='Overall',
markers=['o', 'x', '*'],
hue='Position',
data=footballers.loc[footballers['Position'].isin(
['ST', 'RW', 'LW'])],
fit_reg=False
)
data:image/s3,"s3://crabby-images/f4779/f47799915e334cecb87a40c3632b696633dde31d" alt=""
分组
f = (footballers
.loc[footballers['Position'].isin(['ST', 'GK'])]
.loc[:, ['Value', 'Overall', 'Aggression', 'Position']]
)
f = f[f["Overall"] >= 80]
f = f[f["Overall"] < 85]
f['Aggression'] = f['Aggression'].astype(float)
sns.boxplot(x="Overall", y="Aggression", hue='Position', data=f)
data:image/s3,"s3://crabby-images/5a941/5a941a802dfee8df6503c7ce5bff805b451c9432" alt=""
总结图
热力图
f = (
footballers.loc[:, ['Acceleration', 'Aggression', 'Agility', 'Balance', 'Ball control']]
.applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)
.dropna()
).corr()
sns.heatmap(f, annot=True)
data:image/s3,"s3://crabby-images/44b27/44b2784c826b42ed9bc36fb8448bb770d7efdeb0" alt=""
平行线图
from pandas.plotting import parallel_coordinates
f = (
footballers.iloc[:, 12:17]
.loc[footballers['Position'].isin(['ST', 'GK'])]
.applymap(lambda v: int(v) if str.isdecimal(v) else np.nan)
.dropna()
)
f['Position'] = footballers['Position']
f = f.sample(200)
parallel_coordinates(f, 'Position')
data:image/s3,"s3://crabby-images/98f54/98f54e9bf35cbcd9b8176a1178e0272358857186" alt=""
Seanborn使用
基本图形
柱状图-值统计
countplot == value_count
sns.countplot(reviews['points'])
data:image/s3,"s3://crabby-images/5c6f1/5c6f132c869666b2d2f34bc18d3b706b97cb7cdb" alt=""
折线图-密度图
sns.kdeplot(reviews.query('price < 200').price)
data:image/s3,"s3://crabby-images/46791/4679101105afbb92f5014bad9b0647f87d02f8ac" alt=""
二维密度图--类似蜂窝图作用
样本多,重复点多的时候用
sns.kdeplot(reviews[reviews['price'] < 200].loc[:, ['price', 'points']].dropna().sample(5000))
data:image/s3,"s3://crabby-images/a2ca1/a2ca1c447963d159065ecff545abc1edd78fc518" alt=""
直方图
类似pandas.hist
sns.distplot(reviews['points'], bins=10, kde=False)
data:image/s3,"s3://crabby-images/956f7/956f795bf5c73cc1f9333958fa48ed81389da02c" alt=""
散点图和直方图复合
sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100])
data:image/s3,"s3://crabby-images/0c93e/0c93e25f8a8990b904ecfab2b8f96ea988ed16c1" alt=""
蜂窝图和直方图复合
sns.jointplot(x='price', y='points', data=reviews[reviews['price'] < 100], kind='hex',gridsize=20)
data:image/s3,"s3://crabby-images/b9457/b94572bd8ec634372d4738b869b1650354add01c" alt=""
箱线图
df = reviews[reviews.variety.isin(reviews.variety.value_counts().head(5).index)]
sns.boxplot(
x='variety',
y='points',
data=df
)
data:image/s3,"s3://crabby-images/9ae23/9ae23e1fe48ede81d9993b917041a5bbd0201d73" alt=""
小提琴图
sns.violinplot(
x='variety',
y='points',
data=reviews[reviews.variety.isin(reviews.variety.value_counts()[:5].index)]
)
data:image/s3,"s3://crabby-images/a6f27/a6f27d244c8e80be4f83970779ece083b66f6b6a" alt=""
网络动态图表-plotly
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
散点图
import plotly.graph_objs as go
iplot([go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')])
data:image/s3,"s3://crabby-images/29d95/29d957aee79c77174a7af70f65a961824bb2fb93" alt=""
热力图
iplot([go.Histogram2dContour(x=reviews.head(500)['points'],
y=reviews.head(500)['price'],
contours=go.Contours(coloring='heatmap')),
go.Scatter(x=reviews.head(1000)['points'], y=reviews.head(1000)['price'], mode='markers')])
data:image/s3,"s3://crabby-images/3e795/3e795e454c1ea7e7d8eaca85af978188eb6af9b9" alt=""
图形语法的可视化库plotnine
from plotnine import *
top_wines = reviews[reviews['variety'].isin(reviews['variety'].value_counts().head(5).index)]
df = top_wines.head(1000).dropna()
(ggplot(df)
+ aes('points', 'price')
+ geom_point())
#其他表达形式ggplot(df)
+ geom_point(aes('points', 'price'))
)
(ggplot(df, aes('points', 'price'))
+ geom_point
一层层添加图形参数
data:image/s3,"s3://crabby-images/eddde/eddde627bc8ba7109857aed378f9f3b705ec565b" alt=""
df = top_wines.head(1000).dropna()
(
ggplot(df)
+ aes('points', 'price')
+ geom_point()
+ stat_smooth()
)
data:image/s3,"s3://crabby-images/ed633/ed6330393656369a7afe00bb15bbff1c92e28587" alt=""
添加颜色
df = top_wines.head(1000).dropna()
(
ggplot(df)
+ geom_point()
+ aes(color='points')
+ aes('points', 'price')
+ stat_smooth()
)
一图多表
df = top_wines.head(1000).dropna()
(ggplot(df)
+ aes('points', 'price')
+ aes(color='points')
+ geom_point()
+ stat_smooth()
+ facet_wrap('~variety')
)
data:image/s3,"s3://crabby-images/b639e/b639e231b5ac99bbda072de64726c02f75582bdc" alt=""
柱状图
(ggplot(top_wines)
+ aes('points')
+ geom_bar()
)
data:image/s3,"s3://crabby-images/7fe38/7fe3816514faac9e77a32c4591488082f9a2ee6e" alt=""
二维热力图
(ggplot(top_wines)
+ aes('points', 'variety')
+ geom_bin2d(bins=20)
)
data:image/s3,"s3://crabby-images/b1ac5/b1ac5b103dc3372b8a684e4928cb6144d37100a2" alt=""
更多API文档 API Reference.
处理时间序列
一般柱状图
shelter_outcomes['date_of_birth'].value_counts().sort_values().plot.line()
data:image/s3,"s3://crabby-images/792a6/792a6e4a2ed2c76e13394218b807f45b9f845168" alt=""
按年份重新取样
shelter_outcomes['date_of_birth'].value_counts().resample('Y').sum().plot.line()
data:image/s3,"s3://crabby-images/39e59/39e59ab6d51f653df2f54b7808c0c5708e820afb" alt=""
stocks['volume'].resample('Y').mean().plot.bar()
data:image/s3,"s3://crabby-images/9f94e/9f94eb2f448ddd372451556d88fd2f711d4079f0" alt=""
同期对比
如今年12月和去年12月比较
from pandas.plotting import lag_plot
lag_plot(stocks['volume'].tail(250))
data:image/s3,"s3://crabby-images/f3a71/f3a710a022f84cf49d33ce05ec1f0a5693b617b8" alt=""
自相关图
from pandas.plotting import autocorrelation_plot
autocorrelation_plot(stocks['volume'])
data:image/s3,"s3://crabby-images/11bc9/11bc9d3fdafec33c1ca6780a3c8942d8040f57b7" alt=""