循环遍历多对物种模型以增强 R 中的相关系数

问题描述

这个问题类似于使用 cor.test() 来获取多对变量（例如 Calculate correlation coefficient by bootstrapping 和 for loop with cor.test over many categories）的相关系数，但在这里我试图遍历多个模型和仅获取我关心的 1 个变量的模型之间的相关系数。我是引导程序和循环的新手，我在尝试同时执行这两项操作时遇到了困难。

我有 16 个渔业捕捞物种的模型。我只关心一个变量（渔具），我想引导每个渔具的回归系数以获得每个回归系数的稳健估计，然后计算不同物种之间的相关系数。我有一个循环可以为每个物种对手动执行此操作，但我想遍历所有模型并将结果输出到数据框，并用一列标记每个物种对。

library(mgcv) # for GAMs and GAM outputs

## generate random data for species models
num.caught <- sample(x=0:1000,size =50,replace = TRUE) 
year <- sample(x =2000:2010,size=50,replace=TRUE)
gear <- sample(c('net','line','trawl'),50,replace=TRUE)
species1.dat <- data.frame(num.caught,year,gear)
species1.gam <- gam(num.caught ~ year + gear,data= species1.dat)
#summary(species1.gam)
#species1.gam$coefficients

num.caught <- sample(x=0:100,size =25,size=25,25,replace=TRUE)
species2.dat <- data.frame(num.caught,gear)
species2.gam <- gam(num.caught ~ year + gear,data= species2.dat)

num.caught <- sample(x=0:500,size =30) 
year <- sample(x =2000:2005,size=30,30,replace=TRUE)
species3.dat <- data.frame(num.caught,gear)
species3.gam <- gam(num.caught ~ year + gear,data= species3.dat)

# Make list of all models in environment
spp.names <- grep(".gam",names(.GlobalEnv),value=TRUE)

mod.1 <- species1.gam
mod.2 <- species2.gam
mod.3 <- species3.gam
# etc...

NoSamples <- 1000
CC <- rep(NA,NoSamples)
for(i in 1:NoSamples) {
#get data from a random draw for species 1
Index1 <- grep("gear",names(summary(mod.1)$p.coeff))
Sp1 <- rnorm(summary(mod.1)$p.coeff[Index1],summary(mod.1)$se[Index1])
#Now species 2 
Index2 <- grep("gear",names(summary(mod.2)$p.coeff))
Sp2 <- rnorm(summary(mod.2)$p.coeff[Index2],summary(mod.2)$se[Index2])
#Now get the correlation coefficient and store it
CC[i] <- cor(Sp1,Sp2)
} 
## the loop works to here,but I would have to manually re-run with every combination of the 16 species

## This should go inside the loop
quants <- tibble::rownames_to_column(data.frame(quantile(CC)),"quantile") %>% rename(quant_val="quantile.CC.") 
df.CC <- data.frame(mean(CC),quants)
# paste names for each species 
df.CC$spp_pair <- paste0(names(mod.1),"_",names(mod.2)) # This is wrong,it pastes all the col names for each model,not the name of the model itself
df.CC.wide <- pivot_wider(data=df.CC,id_cols = c(spp_pair,mean.CC.),names_from = quantile,names_prefix = "quant",values_from = quant_val)
names(df.CC.wide) <- gsub(pattern = "%",replacement="",x=names(df.CC.wide))

在这里我可以手动重命名和绑定每个结果数据框，但是应该有一种方法可以遍历所有模型吗？我认为这也可以用 lapply 来完成？

# The desired output would be a dataframe with 1 row for each species pair,e.g:

spp_pair              mean.CC. quant0 quant25 quant50 quant75 quant100
1 species1_species2    0.940  0.940   0.940   0.940   0.940    0.940
2 species1_species3    0.200  0.180   0.190   0.200   0.210    0.220
3 species2_species3    0.750  0.600   0.700   0.720   0.800    0.810

解决方法

运行上述代码后，summary(mod.1)$p.coeff 返回 NULL。我不确定在运行代码时是否遗漏了任何内容，但总的来说，您可以通过使用 mget + lapply 来实现您想要的。

使用 mget 我们可以获取列表中的所有模型，并使用 lapply 从中提取所需的统计信息。所以这样的事情应该适合你。

lapply(mget(spp.names),function(x) {
  Index1 <- grep("gear",names(summary(x)$p.coeff))
  rnorm(summary(x)$p.coeff[Index1],summary(x)$se[Index1])
}) -> result

result

for-loop r r regression