使用seaborn包绘制箱线图
# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
sns.set(style="darkgrid")
# 加载示例数据集
df = sns.load_dataset('iris')
df.head()
|
sepal_length |
sepal_width |
petal_length |
petal_width |
species |
0 |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
1 |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
2 |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
3 |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
4 |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
# 绘制基础箱线图
sns.boxplot(y=df["sepal_length"])
plt.show()
data:image/s3,"s3://crabby-images/0c9c8/0c9c87a9a05d5facde8076ef769eac0438e16737" alt=""
image.png
# 绘制多个变量的箱线图
sns.boxplot(data=df.loc[:, ['sepal_length', 'sepal_width']])
plt.show()
data:image/s3,"s3://crabby-images/0c207/0c2074a25c1df4bbf552f7aabd87191464d2641a" alt=""
image.png
# 绘制分组箱线图
sns.boxplot(x=df["species"], y=df["sepal_length"])
plt.show()
data:image/s3,"s3://crabby-images/9b106/9b106a3dde04320787194e7f812dfb22e96b856c" alt=""
image.png
# 添加扰动点
# boxplot
ax = sns.boxplot(x='species', y='sepal_length', data=df)
# add stripplot
ax = sns.stripplot(x='species', y='sepal_length', data=df, color="orange", jitter=0.2, size=4)
# add title
plt.title("Boxplot with jitter", loc="left")
# show the graph
plt.show()
data:image/s3,"s3://crabby-images/2db67/2db6788487e993b6b30a67dbbf52eafcd1fa425d" alt=""
image.png
# 设置linewidth参数自定义边框线的宽度
sns.boxplot(x=df["species"], y=df["sepal_length"], linewidth=5)
plt.show()
data:image/s3,"s3://crabby-images/915b0/915b0facecc1ee87eca8b59a35d3e0e2e3215c66" alt=""
image.png
# 设置notch=True参数添加缺口
sns.boxplot(x=df["species"], y=df["sepal_length"], notch=True)
plt.show()
data:image/s3,"s3://crabby-images/3b1fe/3b1fe6f1c3d69d0d430e93e8567ff3013b4e5f91" alt=""
image.png
# 设置width参数之定义箱型的宽度
sns.boxplot(x=df["species"], y=df["sepal_length"], width=0.3)
plt.show()
data:image/s3,"s3://crabby-images/bd73a/bd73afd8a8285ef0a2cb2e6584624a4fc11d4cc6" alt=""
image.png
# 自定义颜色
# 设置palette参数自定义颜色画板
sns.boxplot(x=df["species"], y=df["sepal_length"], palette="Blues")
plt.show()
data:image/s3,"s3://crabby-images/7d336/7d336913958d6c4a0731e9ac7409af2f818dea5d" alt=""
image.png
# 设置color参数自定义颜色
sns.boxplot(x=df["species"], y=df["sepal_length"], color='skyblue')
plt.show()
data:image/s3,"s3://crabby-images/6ce36/6ce3627d4283d1581c51d8a35f306093c75f87ae" alt=""
image.png
# 对每组设置不同的颜色
my_pal = {"versicolor": "g", "setosa": "b", "virginica":"m"}
sns.boxplot(x=df["species"], y=df["sepal_length"], palette=my_pal)
plt.show()
data:image/s3,"s3://crabby-images/ac0fe/ac0fececc628bd6b908b26dc164cfdbcdaa1327f" alt=""
image.png
# 设置order参数自定义分组的排序
sns.boxplot(x='species', y='sepal_length', data=df, order=["versicolor", "virginica", "setosa"])
plt.show()
data:image/s3,"s3://crabby-images/57662/576629a251a397d3f8062fb8d1d7a84e89ca9df7" alt=""
image.png
# 根据每组的中位数进行降序排序
# Find the order
my_order = df.groupby(by=["species"])["sepal_length"].median().iloc[::-1].index
# Give it to the boxplot
sns.boxplot(x='species', y='sepal_length', data=df, order=my_order)
plt.show()
data:image/s3,"s3://crabby-images/0dbf6/0dbf6319cb21179a7f3b9af41021ef0a79a445bb" alt=""
image.png
# 添加文本注释标签
ax = sns.boxplot(x="species", y="sepal_length", data=df)
# Calculate number of obs per group & median to position labels
medians = df.groupby(['species'])['sepal_length'].median().values
nobs = df['species'].value_counts().values
nobs = [str(x) for x in nobs.tolist()]
nobs = ["n: " + i for i in nobs]
# Add it to the plot
pos = range(len(nobs))
for tick,label in zip(pos,ax.get_xticklabels()):
ax.text(pos[tick],
medians[tick] + 0.03,
nobs[tick],
horizontalalignment='center',
size='x-small',
color='w',
weight='semibold')
plt.show()
data:image/s3,"s3://crabby-images/2e0e2/2e0e23c4c1f36e901208e49c82d89a04934bb055" alt=""
image.png
# 按分组变量填充颜色
df = sns.load_dataset('tips')
df.head()
|
total_bill |
tip |
sex |
smoker |
day |
time |
size |
0 |
16.99 |
1.01 |
Female |
No |
Sun |
Dinner |
2 |
1 |
10.34 |
1.66 |
Male |
No |
Sun |
Dinner |
3 |
2 |
21.01 |
3.50 |
Male |
No |
Sun |
Dinner |
3 |
3 |
23.68 |
3.31 |
Male |
No |
Sun |
Dinner |
2 |
4 |
24.59 |
3.61 |
Female |
No |
Sun |
Dinner |
4 |
sns.boxplot(x="day", y="total_bill", hue="smoker", data=df, palette="Set1", width=0.5)
plt.show()
data:image/s3,"s3://crabby-images/80f34/80f349c235b22f3477fa82b84d0ca721c77181ab" alt=""
image.png
# Grouped violinplot
sns.violinplot(x="day", y="total_bill", hue="smoker", data=df, palette="Pastel1")
plt.show()
data:image/s3,"s3://crabby-images/df36c/df36c735f65b15bb8ba0cd78898b31334af38049" alt=""
image.png