学习计划:第01周(20190715-20190721)

2019-07-16  本文已影响0人  木圭金夆

知识点:数据的描述性统计

import numpy as np
import stats as sts
scores = [31, 24, 23, 25, 14, 25, 13, 12, 14, 23,
          32, 34, 43, 41, 21, 23, 26, 26, 34, 42,
          43, 25, 24, 23, 24, 44, 23, 14, 52,32,
          42, 44, 35, 28, 17, 21, 32, 42, 12, 34]
#集中趋势的度量
print('求和:',np.sum(scores))
print('个数:',len(scores))
print('平均值:',np.mean(scores))
print('中位数:',np.median(scores))
print('众数:',sts.mode(scores))
print('上四分位数',sts.quantile(scores,p=0.25))
print('下四分位数',sts.quantile(scores,p=0.75))
#离散趋势的度量
print('最大值:',np.max(scores))
print('最小值:',np.min(scores))
print('极差:',np.max(scores)-np.min(scores))
print('四分位差',sts.quantile(scores,p=0.75)-sts.quantile(scores,p=0.25))
print('标准差:',np.std(scores))
print('方差:',np.var(scores))
print('离散系数:',np.std(scores)/np.mean(scores))
#偏度与峰度的度量
print('偏度:',sts.skewness(scores))
print('峰度:',sts.kurtosis(scores))
#几何平均数
def GeometricMean(data):
    len_d = len(data)
    product = 1.0
    for i in range(len_d):
        product = product * data[i]
    # the next line is equal to calculate the n-root
    result = product ** (1.0/len_d)
    return result

#偏度峰度实现
import matplotlib.pyplot as plt
import math
import numpy as np

def calc(data):
    n = len(data)
    niu = 0.0
    niu2 = 0.0
    niu3 = 0.0
    for a in data:
        niu += a
        niu2 += a**2
        niu3 += a**3
    niu/= n   #这是求E(X)
    niu2 /= n #这是E(X^2)
    niu3 /= n #这是E(X^3)
    sigma = math.sqrt(niu2 - niu*niu) #这是D(X)的开方,标准差
    return [niu,sigma,niu3] #返回[E(X),标准差,E(X^3)]

def calc_stat(data):
    [niu,sigma,niu3] = calc(data)
    n = len(data)
    niu4 = 0.0
    for a in data:
        a -= niu
        niu4 += a ** 4
    niu4 /= n   
    skew = (niu3 - 3*niu*sigma**2 - niu**3)/(sigma**3)
    kurt =  niu4/(sigma**2)
    return [niu,sigma,skew,kurt] #返回了均值,标准差,偏度,峰度

if __name__== "__main__":
    data = list(np.random.randn(10000))#关于此处的数组与列表
    data2 = list(2*np.random.randn(10000))
    data3 = [x for x in data if x> -0.5]
    data4 = list(np.random.uniform(0,4,10000))
    [niu,sigma,skew,kurt] = calc_stat(data)
    [niu2,sigma2,skew2,kurt2] = calc_stat(data2)
    [niu3,sigma3,skew3,kurt3] = calc_stat(data3)
    [niu4,sigma4,skew4,kurt4] = calc_stat(data4)
    print niu,sigma,skew,kurt
    print niu2,sigma2,skew2,kurt2
    print niu3,sigma3,skew3,kurt3
    print niu4,sigma4,skew4,kurt4

    info = r'$\mu=%.2f,\ \sigma=%.2f,\ skew=%.2f,\ kurt=%.2f$'%(niu,sigma,skew,kurt)
    info2 =r'$\mu=%.2f,\ \sigma=%.2f,\ skew=%.2f,\ kurt=%.2f$'%(niu2,sigma2,skew2,kurt2)
    plt.text(1,0.38,info,bbox=dict(facecolor='red',alpha=0.25))
    plt.text(1,0.35,info2,bbox=dict(facecolor='green',alpha=0.25))
    #plt.text(x的位置,y的位置,面板内写的信息,标签框的属性=dict(facecolor='面板颜色',alpha='深浅度'))
    plt.hist(data,50,normed=True,facecolor='r',alpha=0.9)
    #hist直方图/箱式图(
    #将data中的元素分到50个等间隔的范围内,返回每个范围内元素的个数作为一个行向量,
    #50代表要分的元素的个数
    #
    #facecolor,alpha都是代表颜色的)
    plt.hist(data2,80,normed=True,facecolor='g',alpha = 0.8)
    plt.grid(True)
    plt.show()

上一篇下一篇

猜你喜欢

热点阅读