问题描述
我有20个变量,我在 R 中执行了多个 LASSO 回归。我有一个预测变量,并使用以下代码将其与模型中的其他所有预测变量进行回归
library(readxl)
data <-read_excel("data.xlsx")
library(glmnet)
library(coefplot)
A <- as.matrix(data)
results <- lapply(seq_len(ncol(A)),function(i) {
list(
fit_lasso = glmnet(A[,-i],A[,i],standardize = T,alpha = 0.9),cvfit = cv.glmnet(A[,standardize = TRUE,type.measure = "mse",nfolds = 5,alpha = 0.9)
)
})
#display only the non zero coefficients
coefficients <- lapply(results,function(x,fun) fun(coef(x$cvfit,s = "lambda.min")),function(x) x[x[,1L] != 0L,1L,drop = FALSE])
在一个列表中导致ncol(data)
个不同的sparse Matrix of class "dgCMatrix"
。对于所有ncol(data)
变量,它们显示如下:
> coefficients
[[1]]
10 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) -2.214861e+03
X3 2.812453e-05
X5 5.841003e-01
X6 5.428515e+00
X7 1.080925e+01
X8 2.454695e+01
X10 3.917866e-01
X12 2.488678e+00
X13 5.441626e+00
X14 2.400565e-01
[[2]]
6 x 1 sparse Matrix of class "dgCMatrix"
1
(Intercept) -7.179757e-01
X3 6.563784e-09
X6 1.867302e-02
X8 1.854556e-01
X10 -2.601140e-03
X13 9.105201e-01
我希望能够在数据框中提取这些变量,以便以后使用它们进行回归。对于sparse Matrix of class "dgCMatrix"
中的一个(让我们使用第一个X1 ),我设法使用它来创建数据框
results[[1L]]$cvfit$lambda.min
coeffs<-coef(results[[2L]]$cvfit,s = "lambda.min")
summs <- summary(coeffs)
ssVarX1 <- data.frame(variables = rownames(coeffs)[summs$i],coefficient = summs$x)
结果为:
variables coefficient
1 (Intercept) -2.214861e+03
2 X3 2.812453e-05
3 X5 5.841003e-01
4 X6 5.428515e+00
5 X7 1.080925e+01
6 X8 2.454695e+01
7 X10 3.917866e-01
8 X12 2.488678e+00
9 X13 5.441626e+00
10 X14 2.400565e-01
尽管如此,在某些情况下ssVarX可以没有变量,然后结果具有这种形式
variable coefficient
1 (Intercept) 106.0629
我如何同时为所有现有的sparse Matrix of class "dgCMatrix"
创建数据帧,而每个数据帧都具有名称ssVarX[i],i=1,...,ncol(data)
?
library(readxl)
data <-read_excel("data.xlsx")
library(glmnet)
library(coefplot)
A <- as.matrix(data)
results <- lapply(seq_len(ncol(A)),alpha = 0.9)
)
})
coefficients <- lapply(results,drop = FALSE])
list2env(`names<-`(
lapply(coefficients,function(x) data.frame(variable = row.names(x),coefficient = unname(x[,1L]))),paste0("ssVarX",seq_along(coefficients))
),envir = .GlobalEnv)
`names<-`(lapply(ls.str(pattern = "ssVarX"),function(x) {
is <- as.integer(sub("(ssVar)?X","",c(x,get(x,envir = .GlobalEnv)$variable[-1])))
if (length(is) == 1) is <- c(is,seq_along(data)[-is])
as.matrix(coef(lm(data = data[,is])))
}),ls.str(pattern = "ssVarX"))
但是,即使在所有情况下选择的解释变量的数量都是正确的,但相应的模型仍未使用ssVarX
数据框中存在的正确变量。我希望它能将每个Xi作为ssVarX指示变量的预测变量进行回归,并从data
中提取出来。为什么会这样呢?我还如何显示每个回归的汇总结果?
解决方法
这是您想要的吗?
lapply(coefficients,function(x) data.frame(variable = row.names(x),coefficient = unname(x[,1L])))
更新
list2env(`names<-`(
lapply(coefficients,1L]))),paste0("ssVarX",seq_along(coefficients))
),envir = .GlobalEnv)
更新2
`names<-`(lapply(ls.str(pattern = "ssVarX"),function(x) {
is <- as.integer(sub("(ssVar)?X","",c(x,get(x,envir = .GlobalEnv)$variable[-1])))
if (length(is) == 1) is <- c(is,seq_along(data)[-is])
as.matrix(coef(lm(data = data[,is])))
}),ls.str(pattern = "ssVarX"))
更新3
这种方式怎么样?
ssVarX <- lapply(coefficients,1L])))
lm_results <- lapply(seq_along(ssVarX),function(i,df) {
x_vars <- df[[i]]$variable[-1L]
if (length(x_vars) == 0) x_vars <- "."
fml <- as.formula(paste0("X",i," ~ ",paste(x_vars,collapse = " + ")))
lm(fml,data = data)
},ssVarX)
要从coefficients
个结果中检索lm
:
lapply(lm_results,function(x) as.matrix(coef(x)))
要从formula
个结果中检索lm
:
lapply(lm_results,function(x) formula(x))
要将所有内容打印到控制台:
lm_results
如果这次仍然有错误,请告诉我错误的公式。