计算最小样本量

2023-04-13  本文已影响0人  Colleen_oh

要计算出样本容量最小值,需要确定以下几个因素:

import math

def calculate_sample_size(data, confidence_level, margin_of_error):
    # 确定置信水平
    confidence_level = confidence_level / 100

    # 计算总体标准差
    population_std_dev = statistics.pstdev(data)

    # 计算置信区间
    z_score = stats.norm.ppf(1 - ((1 - confidence_level) / 2))
    margin_of_error = margin_of_error / 100
    confidence_interval = z_score * population_std_dev / math.sqrt(len(data))

    # 计算样本容量
    sample_size = ((z_score ** 2) * (population_std_dev ** 2)) / (margin_of_error ** 2)

    return math.ceil(sample_size)

# 示例用法
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
confidence_level = 95
margin_of_error = 5

sample_size = calculate_sample_size(data, confidence_level, margin_of_error)

print("样本容量的最小值为:", sample_size)

二分法

from scipy.stats import ranksums

def calculate_sample_size(data, confidence_level, margin_of_error):
    # 确定置信水平
    confidence_level = confidence_level / 100

    # 计算置信区间
    z_score = stats.norm.ppf(1 - ((1 - confidence_level) / 2))
    margin_of_error = margin_of_error / 100
    confidence_interval = z_score * np.std(data, ddof=1) / np.sqrt(len(data))

    # 估计样本容量
    lower_bound = 1
    upper_bound = len(data)
    while True:
        if upper_bound - lower_bound <= 1:
            sample_size = upper_bound
            break
        else:
            mid = (lower_bound + upper_bound) // 2
            sample = np.random.choice(data, size=int(mid), replace=False)
            test_stat, p_value = ranksums(data, sample)
            if p_value >= confidence_level:
                upper_bound = mid
            else:
                lower_bound = mid

    return int(sample_size)

# 示例用法
data = [10, 20, 30, 40, 50]
confidence_level = 95
margin_of_error = 5

sample_size = calculate_sample_size(data, confidence_level, margin_of_error)

print("样本容量的最小值为:", sample_size)
上一篇 下一篇

猜你喜欢

热点阅读