使用matplotlib包绘制密度分布图
# Import libraries
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde
# Build dummy data
data = [1.5]*7 + [2.5]*2 + [3.5]*8 + [4.5]*3 + [5.5]*1 + [6.5]*8
data[1:10]
[1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 2.5, 2.5, 3.5]
# Build a "density" function based on the dataset
# When you give a value from the X axis to this function, it returns the according value on the Y axis
density = gaussian_kde(data)
density.covariance_factor = lambda : .25
density._compute_covariance()
# Create a vector of 200 values going from 0 to 8:
xs = np.linspace(0, 8, 200)
# Set the figure size
plt.figure(figsize=(14, 8))
# Make the chart
# We're actually building a line chart where x values are set all along the axis and y value are
# the corresponding values from the density function
plt.plot(xs,density(xs))
plt.show()
image.png
# Set the figure size
plt.figure(figsize=(14,8))
# plot
plt.fill_between( xs, density(xs), color="#69b3a2", alpha=0.4)
# title
plt.title("How probable something is when someone says 'We believe'", loc='left', fontsize=18)
plt.title("python graph gallery", loc='right', fontsize=13, color='grey', style='italic')
# Axis name
plt.xlabel("probability (%)")
# Remove Y axis
plt.yticks([])
plt.show()
image.png
使用seaborn包绘制密度分布图
# libraries & dataset
import seaborn as sns
import matplotlib.pyplot as plt
# set a grey background (use sns.set_theme() if seaborn version 0.11.0 or above)
sns.set(style="darkgrid")
# 加载示例数据集
df = sns.load_dataset('iris')
df.head()
|
sepal_length |
sepal_width |
petal_length |
petal_width |
species |
0 |
5.1 |
3.5 |
1.4 |
0.2 |
setosa |
1 |
4.9 |
3.0 |
1.4 |
0.2 |
setosa |
2 |
4.7 |
3.2 |
1.3 |
0.2 |
setosa |
3 |
4.6 |
3.1 |
1.5 |
0.2 |
setosa |
4 |
5.0 |
3.6 |
1.4 |
0.2 |
setosa |
# 使用kdeplot函数绘制密度分布图
# Make default density plot
sns.kdeplot(df['sepal_width'])
plt.show()
image.png
# 设置shade=True参数添加阴影
# density plot with shaded area with kdeplot 'shade' parameter
sns.kdeplot(df['sepal_width'], shade=True)
plt.show()
image.png
# 设置vertical=True参数水平放置
# setting 'vertical' parameter to True
sns.kdeplot(df['sepal_width'], shade=True, vertical=True, color="skyblue")
plt.show()
image.png
# 设置bw参数更改bandwidth
# Large bandwidth
sns.kdeplot(df['sepal_width'], shade=True, bw=0.05, color="olive")
plt.show()
image.png
# 绘制多个变量的密度分布图
# plotting both distibutions on the same figure
fig = sns.kdeplot(df['sepal_width'], shade=True, color="r")
fig = sns.kdeplot(df['sepal_length'], shade=True, color="b")
plt.show()
image.png
# 绘制镜像对象密度分布图
import numpy as np
from numpy import linspace
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
# dataframe
df = pd.DataFrame({
'var1': np.random.normal(size=1000),
'var2': np.random.normal(loc=2, size=1000) * -1
})
df.head()
|
var1 |
var2 |
0 |
1.188681 |
-0.885048 |
1 |
-0.481081 |
0.464484 |
2 |
-0.216836 |
-1.629883 |
3 |
-1.329591 |
-3.137911 |
4 |
-0.390510 |
-1.281925 |
# Fig size
plt.rcParams["figure.figsize"]=12,8
# plot density chart for var1
sns.kdeplot(df["var1"], shade=True, alpha=1)
# plot density chart for var2
kde = gaussian_kde(df.var2)
x_range = linspace(min(df.var2), max(df.var2), len(df.var2))
# multiply by -1 to reverse axis (mirror plot)
sns.lineplot(x=x_range*-1, y=kde(x_range) * -1, color='orange')
plt.fill_between(x_range*-1, kde(x_range) * -1, color='orange')
# add axis names
plt.xlabel("value of x")
plt.axhline(y=0, linestyle='-',linewidth=1, color='black')
# show the graph
plt.show()
image.png