[雪峰磁针石博客]数据科学入门4-概率简介
泛泛地讲,如果 E 发生意味着 F 发生(或者 F 发生意味着 E 发生),我们就称事件 E 与事件 F 为不相互独立(dependent)。反之,E 与 F 就相互独立(independent)。
如果事件 E 与事件 F 独立,那么定义式如下:P(E, F)=P(E)P(F)
如果两者不一定独立(并且 F 的概率不为零),那么 E 关于 F 的条件概率式如下:P(E|F)=P(E, F)/P(F)
如果我们假设:
(1) 每个孩子是男孩和是女孩的概率相同
(2) 第二个孩子的性别概率与第一个孩子的性别概率独立
那么,事件“没有女孩”的概率是 1/4,事件“一个男孩,一个女孩”的概率为 1/2,事件“两个女孩”的概率为 1/4
事件 B“两个孩子都是女孩”关于事件 G“大孩子是女孩”的条件概率是多少?用条件概率的定义式进行计算如下:
P(B|G)=P(B, G)/P(G)=P(B)/P(G)=1/2
事件 B 与 G 的交集(“两个孩子都是女孩并且大孩子是女孩”)刚好是事件 B 本身。(一旦你知道两个孩子都是女孩,那大孩子必然是女孩。)
from collections import Counter
import math, random
import matplotlib.pyplot as plt
def random_kid():
return random.choice(["boy", "girl"])
def uniform_pdf(x):
return 1 if x >= 0 and x < 1 else 0
def uniform_cdf(x):
"returns the probability that a uniform random variable is less than x"
if x < 0: return 0 # uniform random is never less than 0
elif x < 1: return x # e.g. P(X < 0.4) = 0.4
else: return 1 # uniform random is always less than 1
def normal_pdf(x, mu=0, sigma=1):
sqrt_two_pi = math.sqrt(2 * math.pi)
return (math.exp(-(x-mu) ** 2 / 2 / sigma ** 2) / (sqrt_two_pi * sigma))
def plot_normal_pdfs(plt):
xs = [x / 10.0 for x in range(-50, 50)]
plt.plot(xs,[normal_pdf(x,sigma=1) for x in xs],'-',label='mu=0,sigma=1')
plt.plot(xs,[normal_pdf(x,sigma=2) for x in xs],'--',label='mu=0,sigma=2')
plt.plot(xs,[normal_pdf(x,sigma=0.5) for x in xs],':',label='mu=0,sigma=0.5')
plt.plot(xs,[normal_pdf(x,mu=-1) for x in xs],'-.',label='mu=-1,sigma=1')
plt.legend()
plt.show()
def normal_cdf(x, mu=0,sigma=1):
return (1 + math.erf((x - mu) / math.sqrt(2) / sigma)) / 2
def plot_normal_cdfs(plt):
xs = [x / 10.0 for x in range(-50, 50)]
plt.plot(xs,[normal_cdf(x,sigma=1) for x in xs],'-',label='mu=0,sigma=1')
plt.plot(xs,[normal_cdf(x,sigma=2) for x in xs],'--',label='mu=0,sigma=2')
plt.plot(xs,[normal_cdf(x,sigma=0.5) for x in xs],':',label='mu=0,sigma=0.5')
plt.plot(xs,[normal_cdf(x,mu=-1) for x in xs],'-.',label='mu=-1,sigma=1')
plt.legend(loc=4) # bottom right
plt.show()
def inverse_normal_cdf(p, mu=0, sigma=1, tolerance=0.00001):
"""find approximate inverse using binary search"""
# if not standard, compute standard and rescale
if mu != 0 or sigma != 1:
return mu + sigma * inverse_normal_cdf(p, tolerance=tolerance)
low_z, low_p = -10.0, 0 # normal_cdf(-10) is (very close to) 0
hi_z, hi_p = 10.0, 1 # normal_cdf(10) is (very close to) 1
while hi_z - low_z > tolerance:
mid_z = (low_z + hi_z) / 2 # consider the midpoint
mid_p = normal_cdf(mid_z) # and the cdf's value there
if mid_p < p:
# midpoint is still too low, search above it
low_z, low_p = mid_z, mid_p
elif mid_p > p:
# midpoint is still too high, search below it
hi_z, hi_p = mid_z, mid_p
else:
break
return mid_z
def bernoulli_trial(p):
return 1 if random.random() < p else 0
def binomial(p, n):
return sum(bernoulli_trial(p) for _ in range(n))
def make_hist(p, n, num_points):
data = [binomial(p, n) for _ in range(num_points)]
# use a bar chart to show the actual binomial samples
histogram = Counter(data)
plt.bar([x - 0.4 for x in histogram.keys()],
[v / num_points for v in histogram.values()],
0.8,
color='0.75')
mu = p * n
sigma = math.sqrt(n * p * (1 - p))
# use a line chart to show the normal approximation
xs = range(min(data), max(data) + 1)
ys = [normal_cdf(i + 0.5, mu, sigma) - normal_cdf(i - 0.5, mu, sigma)
for i in xs]
plt.plot(xs,ys)
plt.show()
if __name__ == "__main__":
#
# CONDITIONAL PROBABILITY
#
both_girls = 0
older_girl = 0
either_girl = 0
random.seed(0)
for _ in range(10000):
younger = random_kid()
older = random_kid()
if older == "girl":
older_girl += 1
if older == "girl" and younger == "girl":
both_girls += 1
if older == "girl" or younger == "girl":
either_girl += 1
print("P(both | older):", both_girls / older_girl) # 0.514 ~ 1/2
print("P(both | either): ", both_girls / either_girl) # 0.342 ~ 1/3
plot_normal_pdfs(plt)
plot_normal_cdfs(plt)
make_hist(0.75, 100, 10000)
执行结果:
P(both | older): 0.5007089325501317
P(both | either): 0.3311897106109325
可爱的python测试开发库 请在github上点赞,谢谢!
python中文库文档汇总
[雪峰磁针石博客]python3标准库-中文版
[雪峰磁针石博客]python3快速入门教程
接口自动化性能测试线上培训大纲
python测试开发自动化测试数据分析人工智能自学每周一练
更多内容请关注 雪峰磁针石:简书
-
技术支持qq群: 144081101(后期会录制视频存在该群群文件) 591302926 567351477 钉钉免费群:21745728
-
道家技术-手相手诊看相中医等钉钉群21734177 qq群:391441566 184175668 338228106 看手相、面相、舌相、抽签、体质识别。服务费50元每人次起。请联系钉钉或者微信pythontesting
图片.png
图片.png