根据较早的目标参数将三个变量之一映射到目标?

问题描述

我正在研究一个定义如下的 Drake 工作流程:

projectName <- c("lake_2018_CER_lib_norm_log2","lake_2018_CER_lib_norm","lake_2018_CER_raw_counts")
normalize <- c(TRUE,TRUE,FALSE)
logTransform <- c(TRUE,FALSE,FALSE)

normalize_fxn <- function(datExpr) {
  
  datExpr <- sweep(datExpr,2,colSums(datExpr),FUN = "/")
  return(datExpr)
  
}

plan <- drake_plan(
  
  datExpr = target(fread(file_in(filePath),sep = "\t") %>% select(-1),transform = map(filePath = !!filePath,.id = FALSE)),datExprnorm = target(if(normalize == TRUE) {normalize_fxn(datExpr)*1e6 + 1} else {datExpr},transform = map(datExpr,normalize = !!normalize)),datExprLog = target(if(logTransform == TRUE) {log2(datExprnorm*1e6 + 1)} else {datExprnorm},transform = map(datExprnorm,logTransform = !!logTransform)),filterGenesMinCells = target(if(is.numeric(percentCells)) {round(ncol(datExprLog)*percentCells)} else {NULL},transform = cross(datExprLog,percentCells = !!percentCells)),makePlots = target(realVsPermCor(datExpr = datExprLog,projectName = projectName,featureType = featureType,nPerms = 100,subsampleReal = NULL,resampleReal = NULL,subsamplePerm,filterGenesMinCells = filterGenesMinCells,filterCellsMinGenes = NULL,fdrSubsample,futureThreads = NULL,openBlasThreads = 10,outDir),transform = cross(filterGenesMinCells,featureType = !!featureType,.id = c(featureType,percentCells)))
)

目标输出如下所示:

> plan$target
 [1] "datExpr"                                                              "datExprLog_TRUE_datExprnorm_TRUE_datExpr"                            
 [3] "datExprLog_FALSE_datExprnorm_TRUE_datExpr_2"                          "datExprLog_FALSE_datExprnorm_FALSE_datExpr"                          
 [5] "datExprnorm_TRUE_datExpr"                                             "datExprnorm_TRUE_datExpr_2"                                          
 [7] "datExprnorm_FALSE_datExpr"                                            "filterGenesMinCells_NULL_datExprLog_TRUE_datExprnorm_TRUE_datExpr"   
 [9] "filterGenesMinCells_0.01_datExprLog_TRUE_datExprnorm_TRUE_datExpr"    "filterGenesMinCells_0.02_datExprLog_TRUE_datExprnorm_TRUE_datExpr"   
[11] "filterGenesMinCells_NULL_datExprLog_FALSE_datExprnorm_TRUE_datExpr_2" "filterGenesMinCells_0.01_datExprLog_FALSE_datExprnorm_TRUE_datExpr_2"
[13] "filterGenesMinCells_0.02_datExprLog_FALSE_datExprnorm_TRUE_datExpr_2" "filterGenesMinCells_NULL_datExprLog_FALSE_datExprnorm_FALSE_datExpr" 
[15] "filterGenesMinCells_0.01_datExprLog_FALSE_datExprnorm_FALSE_datExpr"  "filterGenesMinCells_0.02_datExprLog_FALSE_datExprnorm_FALSE_datExpr" 
[17] "makePlots_gene_NULL"                                                  "makePlots_cell_NULL"                                                 
[19] "makePlots_gene_0.01"                                                  "makePlots_cell_0.01"                                                 
[21] "makePlots_gene_0.02"                                                  "makePlots_cell_0.02"                                                 
[23] "makePlots_gene_NULL_2"                                                "makePlots_cell_NULL_2"                                               
[25] "makePlots_gene_0.01_2"                                                "makePlots_cell_0.01_2"                                               
[27] "makePlots_gene_0.02_2"                                                "makePlots_cell_0.02_2"                                               
[29] "makePlots_gene_NULL_3"                                                "makePlots_cell_NULL_3"                                               
[31] "makePlots_gene_0.01_3"                                                "makePlots_cell_0.01_3"                                               
[33] "makePlots_gene_0.02_3"                                                "makePlots_cell_0.02_3"                                               

这非常接近我想要的,但我坚持的是 projectName我想要三个项目名称之一用于最终目标,具体取决于输入是否产生在前面的步骤中,进行了标准化和/或对数转换

目前,我生成了 18 个目标,因此我希望每个项目名称都映射到 6 个目标。

有什么办法可以做到这一点吗?

解决方法

似乎您可以编写一个函数来接受规范化和日志转换设置并输出项目名称。草图如下。

drake 中的静态分支很难。在 drake 的继任者 targets 中,我尝试使两种分支更容易。 (不过,在项目中期进行切换可能不可行。)

library(drake)

filePath <- "file_path.txt"
normalize <- c(TRUE,TRUE,FALSE)
logTransform <- c(TRUE,FALSE,FALSE)
percentCells <- "percent_cells"
featureType <- "feature_type"
normalize_fxn <- function(datExpr) {
  datExpr <- sweep(datExpr,2,colSums(datExpr),FUN = "/")
  return(datExpr)
}

name_project <- function(normalize,log_transform) {
  switch(
    paste0(normalize,"_",log_transform),TRUE_TRUE = "lake_2018_CER_lib_norm_log2",TRUE_FALSE = "lake_2018_CER_lib_norm",FALSE_FALSE = "lake_2018_CER_raw_counts"
  )
}

plan <- drake_plan(
  datExpr = target(fread(file_in(filePath),sep = "\t") %>% select(-1),transform = map(filePath = !!filePath,.id = FALSE)),datExprNorm = target(if(normalize == TRUE) {normalize_fxn(datExpr)*1e6 + 1} else {datExpr},transform = map(datExpr,normalize = !!normalize)),datExprLog = target(if(logTransform == TRUE) {log2(datExprNorm*1e6 + 1)} else {datExprNorm},transform = map(datExprNorm,logTransform = !!logTransform)),filterGenesMinCells = target(if(is.numeric(percentCells)) {round(ncol(datExprLog)*percentCells)} else {NULL},transform = cross(datExprLog,percentCells = !!percentCells)),makePlots = target(
    realVsPermCor(
      datExpr = datExprLog,# The project name is a function of normalization and log transform.
      projectName = !!name_project(deparse(substitute(normalize)),deparse(substitute(logTransform))),featureType = featureType,nPerms = 100,subsampleReal = NULL,resampleReal = NULL,subsamplePerm,filterGenesMinCells = filterGenesMinCells,filterCellsMinGenes = NULL,fdrSubsample,futureThreads = NULL,openBlasThreads = 10,outDir
    ),transform = cross(filterGenesMinCells,featureType = !!featureType,.id = c(featureType,percentCells))
  )
)

dplyr::filter(plan,grepl("makePlots",target))$command
#> [[1]]
#> realVsPermCor(datExpr = datExprLog_TRUE_datExprNorm_TRUE_datExpr,#>     projectName = "lake_2018_CER_lib_norm_log2",featureType = "feature_type",#>     nPerms = 100,#>     subsamplePerm,filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_TRUE_datExprNorm_TRUE_datExpr,#>     filterCellsMinGenes = NULL,#>     openBlasThreads = 10,outDir)
#> 
#> [[2]]
#> realVsPermCor(datExpr = datExprLog_FALSE_datExprNorm_TRUE_datExpr_2,#>     projectName = "lake_2018_CER_lib_norm",filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_FALSE_datExprNorm_TRUE_datExpr_2,outDir)
#> 
#> [[3]]
#> realVsPermCor(datExpr = datExprLog_FALSE_datExprNorm_FALSE_datExpr,#>     projectName = "lake_2018_CER_raw_counts",filterGenesMinCells = filterGenesMinCells_percent_cells_datExprLog_FALSE_datExprNorm_FALSE_datExpr,outDir)

reprex package (v0.3.0) 于 2021 年 1 月 12 日创建

相关问答

Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其...
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。...
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbc...