问题描述
如果以下是我的数据,我如何才能获得截距和 beta 估计值的 95% 上下置信区间?
data_long <- structure(list(ID = c("00721-2-C","00724-1-C","00733-1-C","00735-2-C","00738-1-C","00721-2-C","00738-1-C"),study_group = structure(c(2L,1L,2L,1L),.Label = c("Intervention group","Control group"),class = "factor"),timepoint = structure(c(1L,3L,4L,.Label = c("Baseline","6 months","24 months","48 months"),variable = structure(c(1L,3L),.Label = c("Energy_kcal","Protein_g","MUFA_g"),value = c(1268.406744,997.5694419165,1402.338548,2228.254514,1969.496113,1429.53627,1847.53429,1390.102819,1484.28248,2005.2722320005,58.485134107,35.8161015647956,74.273355041,59.7441761,60.049667397,57.924105381,73.246959056,51.814332349,56.547926964,60.8630037867644,9.683201376,11.0929851920735,12.213572797,24.875988218,25.29578509,20.701295571,26.720893066,18.564045616,18.833261276,30.9237008583813)),row.names = c(1L,5L,17L,22L,23L,24L,25L,28L,29L,3608L,3611L,3612L,3624L,3629L,3630L,3631L,3632L,3635L,3636L,25250L,25253L,25254L,25266L,25271L,25272L,25273L,25274L,25277L,25278L
),class = "data.frame")
我重复线性回归的代码是:
library(dplyr)
library(broom)
library(tidyr)
data_long %>%
group_by(variable) %>%
do({
my_mdl <- lm(value ~ timepoint,.)
my_tidy <- tidy(my_mdl)
my_tidy_wide <- pivot_wider(my_tidy,names_from = "term",values_from = everything())
my_glance <- glance(my_mdl)
bind_cols(my_tidy_wide,my_glance)
}) %>%
select(variable,starts_with("estimate"),starts_with("std.error"),starts_with("p.value"),AIC,adj.r.squared)
# A tibble: 5 x 16
# Groups: variable [5]
variable `estimate_(Inte~ estimate_timepo~ estimate_timepo~ estimate_timepo~ `std.error_(Int~ std.error_timep~ std.error_timep~ std.error_timep~ `p.value_(Inter~ p.value_timepoi~
<fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Energy_~ 1624. 45.9 -198. 346. 168. 215. 241. 343. 0.0000713 0.838
2 Protein~ 59.1 -1.49 1.76 0.929 5.18 6.61 7.42 10.6 0.0000270 0.829
3 MUFA_g 20.8 -0.139 -4.26 4.50 3.25 4.15 4.65 6.62 0.000685 0.974
4 Sugar_g 82.3 -1.29 -18.5 46.2 10.6 13.5 15.2 21.6 0.000241 0.927
5 Sweets_g 34.7 -2.32 -23.0 52.8 11.7 14.9 16.7 23.8 0.0249 0.882
# ... with 5 more variables: p.value_timepoint2 <dbl>,p.value_timepoint3 <dbl>,p.value <dbl>,AIC <dbl>,adj.r.squared <dbl>
谢谢!
解决方法
这有效:
data_long %>%
group_by(variable) %>%
do({
my_mdl <- lm(value ~ timepoint,.)
my_tidy <- tidy(my_mdl,conf.int = TRUE)
my_tidy_wide <- pivot_wider(my_tidy,names_from = "term",values_from = everything())
my_glance <- glance(my_mdl)
bind_cols(my_tidy_wide,my_glance)
}) %>%
select(variable,starts_with("estimate"),starts_with("std.error"),starts_with("p.value"),starts_with("conf.low"),starts_with("conf.high"),AIC,BIC,adj.r.squared,df.residual)