R按顺序运行回归

问题描述

 set.seed(1)
   data=data.frame(
    
    student=1:5000,alfa =runif(5000),bravo =runif(5000),charlie =runif(5000),delta =runif(5000),echo =runif(5000),foxtrot =runif(5000),golf =runif(5000),hotel =runif(5000),india =runif(5000),juliett =runif(5000),kilo =runif(5000),lima =runif(5000),mike =runif(5000),november =runif(5000),oscar =runif(5000),papa =runif(5000),GROUP = sample(0:1,r=T))

##########################

COVS1 = c(golf,hotel,india)
COVS2 = c(juliett,kilo)
COVS3 = c(lima,mike,november,oscar,papa)


## OVERALL
lm(alpha ~ delta +  golf + hotel + india,data = data)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo,data = data)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(alpha ~ echo +  golf + hotel + india,data = data)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo,data = data)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(alpha ~ foxtrot +  golf + hotel + india,data = data)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo,data = data)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(bravo ~ delta +  golf + hotel + india,data = data)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo,data = data)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(bravo ~ echo +  golf + hotel + india,data = data)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo,data = data)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(bravo ~ foxtrot +  golf + hotel + india,data = data)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo,data = data)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(charlie ~ delta +  golf + hotel + india,data = data)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo,data = data)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(charlie ~ echo +  golf + hotel + india,data = data)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo,data = data)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)
lm(charlie ~ foxtrot +  golf + hotel + india,data = data)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo,data = data)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,data = data)

## GROUP == 0
lm(alpha ~ delta +  golf + hotel + india,data ~ subset(data,GROUP == 0)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(alpha ~ echo +  golf + hotel + india,GROUP == 0)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(alpha ~ foxtrot +  golf + hotel + india,GROUP == 0)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(bravo ~ delta +  golf + hotel + india,GROUP == 0)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(bravo ~ echo +  golf + hotel + india,GROUP == 0)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(bravo ~ foxtrot +  golf + hotel + india,GROUP == 0)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(charlie ~ delta +  golf + hotel + india,GROUP == 0)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(charlie ~ echo +  golf + hotel + india,GROUP == 0)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)
lm(charlie ~ foxtrot +  golf + hotel + india,GROUP == 0)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo,GROUP == 0)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 0)

## GROUP == 1
lm(alpha ~ delta +  golf + hotel + india,GROUP == 1)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(alpha ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(alpha ~ echo +  golf + hotel + india,GROUP == 1)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(alpha ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(alpha ~ foxtrot +  golf + hotel + india,GROUP == 1)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(alpha ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(bravo ~ delta +  golf + hotel + india,GROUP == 1)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(bravo ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(bravo ~ echo +  golf + hotel + india,GROUP == 1)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(bravo ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(bravo ~ foxtrot +  golf + hotel + india,GROUP == 1)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(bravo ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(charlie ~ delta +  golf + hotel + india,GROUP == 1)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(charlie ~ delta +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(charlie ~ echo +  golf + hotel + india,GROUP == 1)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(charlie ~ echo +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)
lm(charlie ~ foxtrot +  golf + hotel + india,GROUP == 1)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo,GROUP == 1)
lm(charlie ~ foxtrot +  golf + hotel + india + juliett + kilo + lima + mike + november + oscar + papa,GROUP == 1)

我想知道,如何使用某种函数来运行所有这些回归? 基本上,回归在COVS1上回归“ alpha”,然后在COVS1和COVS2上回归“ alpha”,然后在COVS1和COVS2和COVS3上回归“ alpha”。这是针对整个数据完成的,然后分别针对GROUP == 0和GROUP == 1进行。重复相同的过程,将'alpha'替换为'bravo',然后替换为'charlie',所以我只希望运行所有这些,然后存储所有输出的模型和系数估计值以及标准误差。

解决方法

我们可以创建一个循环来实现

COVS1 <- c('golf','hotel','india')
COVS2 <- c('juliett','kilo')
COVS3 <- c('lima','mike','november','oscar','papa')
    
COVS <- list(COVS1,COVS2,COVS3)
    
df1 <- expand.grid(resp = c('alfa','bravo','charlie'),pred = c('delta','echo','foxtrot'),stringsAsFactors = FALSE)
    
df1 <- df1[order(df1$resp),]



library(broom)
lst_mod <-  lapply(seq_along(COVS),function(i) 
      lapply(seq_along(COVS),function(j) {

      fmla <- reformulate(c(df1$pred[i],unlist(COVS[seq_len(j)])),response = df1$resp[i])
      tidy(lm(fmla,data = data))
 }))

或者如果我们需要一个数据集

library(dplyr)
library(purrr)
out_dat <- lapply(seq_along(COVS),function(i) 
  map_dfr(seq_along(COVS),~ {

  fmla <- reformulate(c(df1$pred[i],unlist(COVS[seq_len(.x)])),response = df1$resp[i])
  tidy(lm(fmla,data = data)) %>% 
        mutate(response_variable = df1$resp[i],formula = list(fmla))
    })) %>%
       bind_rows

如果我们需要在数据lm上应用subset,请对数据进行子集

dat0 <- subset(data,GROUP == 0)
dat1 <- subset(data,GROUP == 1)

并将data中的lm更改为'dat0','dat1'