问题描述
我对模型预测的偏差校正自举置信区间感兴趣。我已经编写了一些代码来执行引导,如下所示:
library(tidyverse)
#> Warning: package 'ggplot2' was built under R version 4.0.2
#> Warning: package 'tibble' was built under R version 4.0.2
#> Warning: package 'tidyr' was built under R version 4.0.2
#> Warning: package 'dplyr' was built under R version 4.0.2
library(boot)
x = rnorm(25)
eta = -0.5 + 0.2*x
y = rpois(25,exp(eta))
d = tibble(x,y)
predictions<-function(data,ind){
model = glm(y ~ x,data = data[ind,])
newdata = tibble(x = seq(-2,2,0.01))
eta = predict(model,newdata = newdata)
exp(eta)
}
bt = boot(d,predictions,1000)
boot.ci(bt,type = 'bca')
#> BOOTSTRAP CONFIDENCE INTERVAL CALculaTIONS
#> Based on 1000 bootstrap replicates
#>
#> CALL :
#> boot.ci(boot.out = bt,type = "bca")
#>
#> Intervals :
#> Level BCa
#> 95% ( 0.654,6.095 )
#> Calculations and Intervals on Original Scale
#> Some BCa intervals may be unstable
引导程序按预期工作,但是boot.ci
调用仅计算单个置信区间。
- 用于引导程序预测的一列(本质上是引导程序的平均值)
- 可信度下限
100*(1-a)
的一列, - 可信度上限
100*(1-a)
的一列。
如何使用boot
获得所需的输出?
解决方法
您可以使用replicate
作为引导程序。
predictions <- function(X) {
model <- glm(y ~ x,data=X)
newdata <- data.frame(x=seq(-2,2,0.01))
eta <- predict(model,newdata=newdata)
exp(eta)
}
set.seed(42)
b <- replicate(200,predictions(d[sample(1:nrow(d),nrow(d),replace=T),]))
res <- cbind(estimate=colMeans(b),matrixStats::colQuantiles(b,probs=c(.025,.975)))
head(res)
# estimate 2.5% 97.5%
# [1,] 2.913660 1.324121 5.363724
# [2,] 2.432935 1.327170 3.990526
# [3,] 2.608683 1.221961 4.715235
# [4,] 2.681802 1.395430 4.537477
# [5,] 2.592316 1.947494 3.357226
# [6,] 3.705157 1.606632 7.012669
数据:
d <- structure(list(x = c(-0.733089270649064,0.843270552215953,-0.860309462865639,1.57992224055388,0.606460750363077,-1.60987545170912,0.116723009723847,-1.35220761859743,-0.721654356711471,-0.614137997795221,-0.0900374904672829,1.20655915421524,1.46948958927721,-0.210496187862193,-1.21763978187878,-0.566774409165728,2.49263604822452,1.37169581528637,-0.407181458046919,-0.247944038851957,-0.703358091631417,-0.311797310571164,-0.990496073627748,0.515047211912022,0.355009786755435),y = c(0L,1L,0L,2L,3L,2L)),class = "data.frame",row.names = c(NA,-25L))
,
您可以传递index=
参数以获得感兴趣的列:
res = lapply(1:ncol(bt$t),function(i){
ci = boot.ci(bt,index=i,type = 'bca')
data.frame(
var = i,mean = mean(bt$t[,i]),lower = ci$bca[4],upper = ci$bca[5]
)
})
res = do.call(rbind,res)
head(res)
var mean lower upper
1 1 2.736563 0.9131014 5.768826
2 2 2.729717 0.9155394 5.743819
3 3 2.722903 0.9174191 5.709087
4 4 2.716120 0.9215527 5.706216
在您的示例中不太容易看到,我们可以看到它与虹膜兼容:
bo = boot(iris,function(d,i)colMeans(d[i,-5]),1000)
data.frame(
var=colnames(iris)[1:4],mean = colMeans(bo$t),t(sapply(1:4,function(i)boot.ci(bo,type="bca")$bca[4:5]))
)
var mean X1 X2
1 Sepal.Length 5.847009 5.712667 5.969333
2 Sepal.Width 3.055873 2.982667 3.128428
3 Petal.Length 3.767644 3.487473 4.029977
4 Petal.Width 1.203107 1.083937 1.313888