问题描述
当我要从N(1,2)依次获取100个样本并依次估算均值和标准差时,我可以这样做:
sample = np.random.normal(1,2,(100,1))
sample_mean = []
for i,_ in enumerate(sample):
sample_mean.append(sample[:i+1,:].ravel().mean())
sample_std.append(sample[:i+1,:].ravel().std())
但是,如果我想计算这些连续样本的置信区间,该怎么办?
解决方法
一个人可以将样本均值的CI计算为sampleMean +/- tstat * std/sqrt(n)
和样本标准差的CI作为( (sampleSize-1)*std^2/chisq(a/2),(sampleSize-1)*std^2/chisq(1-a/2) )的平方根。
以下是示例:
import numpy as np
import scipy.stats as stats
from scipy.stats import chi2
n_series = 100
sample = np.random.normal(0,1,n_series)
sample_mean,sample_std = [],[]
sample_mean_CI,sample_std_CI = [],[]
alpha = 0.1 # e.g. alpha = 0.1 for 90-percent CI,alpha=0.05 for 95-percent CI
def mean_div(std,n,alpha): return stats.t.isf(alpha/2,n) * (std/np.sqrt(n))
def mean_ci(xbar,std,alpha):
div = mean_div(std,alpha)
return (xbar - div,xbar + div)
def std_lower(std,alpha): return np.sqrt(((n-1)*std**2)/chi2.isf(alpha/2,n-1))
def std_upper(std,alpha): return np.sqrt(((n-1)*std**2)/chi2.isf(1-alpha/2,n-1))
def std_ci(std,alpha): return (std_lower(std,alpha),std_upper(std,alpha))
for i,_ in enumerate(sample):
x = sample[:i+1]
xbar = x.mean()
std = x.std()
sample_mean.append(xbar)
sample_std.append(std)
sample_mean_CI.append(mean_ci(xbar,i+1,alpha))
sample_std_CI.append(std_ci(std,alpha))
# check for convergence for the whole series
print(sample_mean_CI[-1])
print(sample_std_CI[-1])
一个人可以更改数据序列的数量和/或正态分布的均值和std参数进行试验。
,您可以使用OpenTURNS做到这一点。下面的代码计算置信度为0.9的参数(均值和标准差)的渐近双边置信区间。
import numpy as np
import openturns as ot
sample = np.random.normal(1,2,(100,1))
confidence_level = 0.9
sample_mean_ci = []
sample_std_ci = []
nor = ot.NormalFactory() # class that fits a Normal distribution on the data
for i in range(1,len(sample)):
estimated_params = nor.buildEstimator(sample[:i+1,:]).getParameterDistribution()
ci = estimated_params.computeBilateralConfidenceInterval(confidence_level)
sample_mean_ci.append([ci.getLowerBound()[0],ci.getUpperBound()[0]])
sample_std_ci.append([ci.getLowerBound()[1],ci.getUpperBound()[1]])