类“ dgCMatrix”的稀疏矩阵转换为数据帧,并用于R中的同时回归

问题描述

我有20个变量,我在 R 中执行了多个 LASSO 回归。我有一个预测变量,并使用以下代码将其与模型中的其他所有预测变量进行回归

library(readxl)
data <-read_excel("data.xlsx")
library(glmnet)
library(coefplot)

A <- as.matrix(data)
results <- lapply(seq_len(ncol(A)),function(i) {
  list(
    fit_lasso = glmnet(A[,-i],A[,i],standardize = T,alpha = 0.9),cvfit = cv.glmnet(A[,standardize = TRUE,type.measure = "mse",nfolds = 5,alpha = 0.9)
  )
})

   #display only the non zero coefficients
    coefficients <- lapply(results,function(x,fun) fun(coef(x$cvfit,s = "lambda.min")),function(x) x[x[,1L] != 0L,1L,drop = FALSE])

一个列表中导致ncol(data)个不同的sparse Matrix of class "dgCMatrix"。对于所有ncol(data)变量,它们显示如下:

> coefficients 
[[1]]
10 x 1 sparse Matrix of class "dgCMatrix"
                        1
(Intercept) -2.214861e+03
X3           2.812453e-05
X5           5.841003e-01
X6           5.428515e+00
X7           1.080925e+01
X8           2.454695e+01
X10          3.917866e-01
X12          2.488678e+00
X13          5.441626e+00
X14          2.400565e-01

[[2]]
6 x 1 sparse Matrix of class "dgCMatrix"
                        1
(Intercept) -7.179757e-01
X3           6.563784e-09
X6           1.867302e-02
X8           1.854556e-01
X10         -2.601140e-03
X13          9.105201e-01

我希望能够在数据框中提取这些变量,以便以后使用它们进行回归。对于sparse Matrix of class "dgCMatrix"中的一个让我们使用第一个X1 ),我设法使用它来创建数据框

results[[1L]]$cvfit$lambda.min
coeffs<-coef(results[[2L]]$cvfit,s = "lambda.min")

summs <- summary(coeffs)

ssVarX1 <- data.frame(variables      = rownames(coeffs)[summs$i],coefficient      = summs$x)

结果为:

    variables   coefficient
1  (Intercept) -2.214861e+03
2           X3  2.812453e-05
3           X5  5.841003e-01
4           X6  5.428515e+00
5           X7  1.080925e+01
6           X8  2.454695e+01
7          X10  3.917866e-01
8          X12  2.488678e+00
9          X13  5.441626e+00
10         X14  2.400565e-01

尽管如此,在某些情况下ssVarX可以没有变量,然后结果具有这种形式

     variable coefficient
1 (Intercept)    106.0629

我如何同时为所有现有的sparse Matrix of class "dgCMatrix"创建数据帧,而每个数据帧都具有名称ssVarX[i],i=1,...,ncol(data)

根据评论,大部分内容是通过以下代码完成的

library(readxl)
data <-read_excel("data.xlsx")
library(glmnet)
library(coefplot)

A <- as.matrix(data)
results <- lapply(seq_len(ncol(A)),alpha = 0.9)
  )
})
coefficients <- lapply(results,drop = FALSE])
list2env(`names<-`(
  lapply(coefficients,function(x) data.frame(variable = row.names(x),coefficient = unname(x[,1L]))),paste0("ssVarX",seq_along(coefficients))
),envir = .GlobalEnv)

`names<-`(lapply(ls.str(pattern = "ssVarX"),function(x) {
  is <- as.integer(sub("(ssVar)?X","",c(x,get(x,envir = .GlobalEnv)$variable[-1])))
  if (length(is) == 1) is <- c(is,seq_along(data)[-is])
  as.matrix(coef(lm(data = data[,is])))
}),ls.str(pattern = "ssVarX"))

但是,即使在所有情况下选择的解释变量的数量都是正确的,但相应的模型仍未使用ssVarX数据框中存在的正确变量。我希望它能将每个Xi作为ssVarX指示变量的预测变量进行回归,并从data提取出来。为什么会这样呢?我还如何显示每个回归的汇总结果?

解决方法

这是您想要的吗?

lapply(coefficients,function(x) data.frame(variable = row.names(x),coefficient = unname(x[,1L])))

更新

list2env(`names<-`(
  lapply(coefficients,1L]))),paste0("ssVarX",seq_along(coefficients))
),envir = .GlobalEnv)

更新2

`names<-`(lapply(ls.str(pattern = "ssVarX"),function(x) {
  is <- as.integer(sub("(ssVar)?X","",c(x,get(x,envir = .GlobalEnv)$variable[-1])))
  if (length(is) == 1) is <- c(is,seq_along(data)[-is])
  as.matrix(coef(lm(data = data[,is])))
}),ls.str(pattern = "ssVarX"))

更新3

这种方式怎么样?

ssVarX <- lapply(coefficients,1L])))

lm_results <- lapply(seq_along(ssVarX),function(i,df) {
  x_vars <- df[[i]]$variable[-1L]
  if (length(x_vars) == 0) x_vars <- "."
  fml <- as.formula(paste0("X",i," ~ ",paste(x_vars,collapse = " + ")))
  lm(fml,data = data)
},ssVarX)

要从coefficients个结果中检索lm

lapply(lm_results,function(x) as.matrix(coef(x)))

要从formula个结果中检索lm

lapply(lm_results,function(x) formula(x))

要将所有内容打印到控制台:

lm_results

如果这次仍然有错误,请告诉我错误的公式。