Python数据可视化(八):小提琴图绘制
2021-05-05 本文已影响0人
Davey1220
使用seaborn包绘制小提琴图
# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
# 加载示例数据集
df = sns.load_dataset('iris')
df.head()
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
# 绘制基础小提琴图
# Make boxplot for one group only
sns.violinplot(y=df["sepal_length"])
plt.show()

# 绘制多个变量的小提琴图
# plot
sns.violinplot(data=df.iloc[:,0:2])
plt.show()

# 绘制分组小提琴图
# plot
sns.violinplot( x=df["species"], y=df["sepal_length"] )
plt.show()

# 水平放置小提琴图
# Just switch x and y
sns.violinplot(y=df["species"], x=df["sepal_length"])
plt.show()

# 设置linewidth参数更改边框线的宽度
# Change line width
sns.violinplot(x=df["species"], y=df["sepal_length"], linewidth=5)
plt.show()

# 设置width参数更改小提琴的宽度
# Change width
sns.violinplot(x=df["species"], y=df["sepal_length"], width=0.3)
plt.show()

# 自定义小提琴的颜色
# Use a color palette
sns.violinplot(x=df["species"], y=df["sepal_length"], palette="Reds")
plt.show()

# plot
sns.violinplot(x=df["species"], y=df["sepal_length"], color="skyblue")
plt.show()

# creating a dictionary with one specific color per group:
my_pal = {"versicolor": "g", "setosa": "b", "virginica": "m"}
# plot it
sns.violinplot(x=df["species"], y=df["sepal_length"], palette=my_pal)
plt.show()

# 自定义分组的排序
# specifying the group list as 'order' parameter and plotting
sns.violinplot(x='species', y='sepal_length', data=df, order=[ "versicolor", "virginica", "setosa"])
plt.show()

# Using pandas methods and slicing to determine the order by decreasing median
my_order = df.groupby(by=["species"])["sepal_length"].median().iloc[::-1].index
# Specifying the 'order' parameter with my_order and plotting
sns.violinplot(x='species', y='sepal_length', data=df, order=my_order)
plt.show()

# 添加文本注释信息
# Basic violinplot stored in a matplotlib.axes object
ax = sns.violinplot(x="species", y="sepal_length", data=df)
# Calculate number of obs per group & median to position labels
medians = df.groupby(['species'])['sepal_length'].median().values
nobs = df['species'].value_counts().values
nobs = [str(x) for x in nobs.tolist()]
nobs = ["n: " + i for i in nobs]
# Add text to the figure
pos = range(len(nobs))
for tick, label in zip(pos, ax.get_xticklabels()):
ax.text(pos[tick], medians[tick] + 0.03, nobs[tick],
horizontalalignment='center',
size='small',
color='w',
weight='semibold')
plt.show()
