Python数据可视化(四):频率直方图绘制
2021-04-29 本文已影响0人
Davey1220
使用seaborn包绘制直方图
# 导入所需的python包
# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
sns.set(style="darkgrid")
# 加载示例数据
df = sns.load_dataset("iris")
df.head()
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
# 使用distplot函数绘制基础直方图
sns.distplot(df["sepal_length"],hist=True, kde=False)
plt.show()
image.png
# 设置kde=True添加密度曲线
sns.distplot(df["sepal_length"],hist=True, kde=True)
plt.show()
image.png
# 设置bins参数更改bin的个数
sns.distplot(df["sepal_length"],hist=True, kde=True, bins=30)
plt.show()
image.png
# 设置rug=True参数添加轴须线
sns.distplot(df["sepal_length"], kde=True, rug=True)
# in the next version of the distplot function, one would have to write:
# sns.distplot(data=df, x="sepal_length", kde=True, rug=True) # note that 'kind' is 'hist' by default
plt.show()
image.png
# 自定义轴须线的颜色和宽度
sns.distplot(df["sepal_length"],
kde=True,
rug=True,
rug_kws={"color": "r", "alpha": 0.3, "linewidth": 2, "height":0.2})
# in the next version of the distplot function, one would have to write:
# sns.distplot(data=df, x="sepal_length", kde=True, rug=True, rug_kws={"color": "r", "alpha":0.3, "linewidth": 2, "height":0.2 })
plt.show()
image.png
# 自定义密度曲线的颜色和宽度
sns.distplot(df["sepal_length"],
kde=True,
kde_kws={"color": "g", "alpha": 0.3, "linewidth": 5, "shade": True})
# in the next version of the distplot function, one would have to write:
# sns.distplot(data=df, x="sepal_length", kde=True, kde_kws={"color": "g", "alpha": 0.3, "linewidth": 5, "shade": True})
plt.show()
image.png
# 绘制多个变量的直方图
sns.distplot(df["sepal_length"], color="skyblue", label="Sepal Length", kde=True)
sns.distplot(df["sepal_width"], color="red", label="Sepal Width", kde=True)
plt.legend()
plt.show()
image.png
# 分面展示多个直方图
# 设置画板
fig, axs = plt.subplots(2, 2, figsize=(7, 7))
# 分别绘制多个直方图
sns.distplot(df["sepal_length"], kde=True, color="skyblue", ax=axs[0, 0])
sns.distplot(df["sepal_width"], kde=True, color="olive", ax=axs[0, 1])
sns.distplot(df["petal_length"], kde=True, color="gold", ax=axs[1, 0])
sns.distplot(df["petal_width"], kde=True, color="teal", ax=axs[1, 1])
plt.show()
image.png
# 使用jointplot函数绘制边际直方图
# Custom the inside plot: options are: “scatter” | “reg” | “resid” | “kde” | “hex”
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='scatter')
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='hex')
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='kde')
plt.show()
image.png
image.png
image.png
# 自定义颜色
# Then you can pass arguments to each type:
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='scatter', s=200, color='m', edgecolor="skyblue", linewidth=2)
# Custom the color
sns.set(style="white", color_codes=True)
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='kde', color="skyblue")
plt.show()
image.png
image.png
# 自定义直方图的bins数
# Custom the histogram:
sns.jointplot(x=df["sepal_length"], y=df["sepal_width"], kind='hex', marginal_kws=dict(bins=30, color="r"))
plt.show()
image.png
# 添加箱线图
# creating a figure composed of two matplotlib.Axes objects (ax_box and ax_hist)
f, (ax_box, ax_hist) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.15, .85)})
# assigning a graph to each ax
sns.boxplot(df["sepal_length"], ax=ax_box)
sns.distplot(df.sepal_length, ax=ax_hist)
# Remove x axis name for the boxplot
ax_box.set(xlabel='')
plt.show()
image.png