当因素分层时,在RStudio中使用ggforest绘制Cox PH模型吗?

问题描述

一个模型变量需要分层时,我正在尝试找到一种方法来从Cox-PH模型中绘制危险比的森林图。对于非分层模型,-1函数非常出色。运行一些示例代码

ggforest()

产生此图

non-stratified figure

但是,如果我必须通过分层纠正非比例性

library(survival)
library(survminer)

model <- coxph(Surv(time,status) ~ sex + rx + adhere,data = colon )
ggforest(model)

colon <- within(colon,{
  sex <- factor(sex,labels = c("female","male"))
  differ <- factor(differ,labels = c("well","moderate","poor"))
  extent <- factor(extent,labels = c("submuc.","muscle","serosa","contig."))
})
bigmodel <-
  coxph(Surv(time,status) ~ sex + rx + adhere + differ + extent + node4,data = colon )
ggforest(bigmodel)

出现以下错误消息:

stratamodel <- coxph(Surv(time,status) ~ sex + strata(rx) + adhere + differ + extent + node4,data = colon ) ggforest(stratamodel) (数据,,var)中的错误:未定义的列已选中

此外:警告消息:

在.get_data(model,data = data)中:[.data.frame参数不是 提供。数据将从模型拟合中提取。”

关于如何从地层模型中获取ggforest所需信息的任何建议,以便可以生成绘图?感谢您的帮助!

解决方法

我收集到所需的森林图是一个简单地跳过模型公式中分层RX变量的图。如果是这样,我们可以简单地在其中插入if子句,以忽略与数据中的列名不完全对应的公式部分(例如,“ strata(rx)”不是列名)。

方法1

如果您对R足够满意,可以修改函数,请运行trace(ggforest,edit = TRUE)并用以下版本替换弹出窗口中的allTerms <- lapply(...)(第10-25行):

allTerms <- lapply(seq_along(terms),function(i) {
  var <- names(terms)[i]
  if(var %in% colnames(data)) {
    if (terms[i] %in% c("factor","character")) {
      adf <- as.data.frame(table(data[,var]))
      cbind(var = var,adf,pos = 1:nrow(adf))
    }
    else if (terms[i] == "numeric") {
      data.frame(var = var,Var1 = "",Freq = nrow(data),pos = 1)
    }
    else {
      vars = grep(paste0("^",var,"*."),coef$term,value = TRUE)
      data.frame(var = vars,pos = seq_along(vars))
    }
  } else {
    message(var,"is not found in data columns,and will be skipped.")
  }
  
})
ggforest(stratamodel) # this should work after the modification

plot

该修改将不会保留到后续的R会话中。如果要在当前会话中撤消修改,只需运行untrace(ggforest)

方法2

如果您希望永久性地修改该函数的版本以备将来使用/不想混用某个库的函数,请在下面保存该函数:

ggforest2 <- function (model,data = NULL,main = "Hazard ratio",cpositions = c(0.02,0.22,0.4),fontsize = 0.7,refLabel = "reference",noDigits = 2) {
  conf.high <- conf.low <- estimate <- NULL
  stopifnot(class(model) == "coxph")
  data <- survminer:::.get_data(model,data = data)
  terms <- attr(model$terms,"dataClasses")[-1]
  coef <- as.data.frame(broom::tidy(model))
  gmodel <- broom::glance(model)
  allTerms <- lapply(seq_along(terms),function(i) {
    var <- names(terms)[i]
    if(var %in% colnames(data)) {
      if (terms[i] %in% c("factor","character")) {
        adf <- as.data.frame(table(data[,var]))
        cbind(var = var,pos = 1:nrow(adf))
      }
      else if (terms[i] == "numeric") {
        data.frame(var = var,pos = 1)
      }
      else {
        vars = grep(paste0("^",value = TRUE)
        data.frame(var = vars,pos = seq_along(vars))
      }
    } else {
      message(var,and will be skipped.")
    }    
  })
  allTermsDF <- do.call(rbind,allTerms)
  colnames(allTermsDF) <- c("var","level","N","pos")
  inds <- apply(allTermsDF[,1:2],1,paste0,collapse = "")
  rownames(coef) <- gsub(coef$term,pattern = "`",replacement = "")
  toShow <- cbind(allTermsDF,coef[inds,])[,c("var","p.value","estimate","conf.low","conf.high","pos")]
  toShowExp <- toShow[,5:7]
  toShowExp[is.na(toShowExp)] <- 0
  toShowExp <- format(exp(toShowExp),digits = noDigits)
  toShowExpClean <- data.frame(toShow,pvalue = signif(toShow[,4],noDigits + 1),toShowExp)
  toShowExpClean$stars <- paste0(round(toShowExpClean$p.value," ",ifelse(toShowExpClean$p.value < 0.05,"*",""),ifelse(toShowExpClean$p.value < 0.01,ifelse(toShowExpClean$p.value < 0.001,""))
  toShowExpClean$ci <- paste0("(",toShowExpClean[,"conf.low.1"]," - ","conf.high.1"],")")
  toShowExpClean$estimate.1[is.na(toShowExpClean$estimate)] = refLabel
  toShowExpClean$stars[which(toShowExpClean$p.value < 0.001)] = "<0.001 ***"
  toShowExpClean$stars[is.na(toShowExpClean$estimate)] = ""
  toShowExpClean$ci[is.na(toShowExpClean$estimate)] = ""
  toShowExpClean$estimate[is.na(toShowExpClean$estimate)] = 0
  toShowExpClean$var = as.character(toShowExpClean$var)
  toShowExpClean$var[duplicated(toShowExpClean$var)] = ""
  toShowExpClean$N <- paste0("(N=",toShowExpClean$N,")")
  toShowExpClean <- toShowExpClean[nrow(toShowExpClean):1,]
  rangeb <- range(toShowExpClean$conf.low,toShowExpClean$conf.high,na.rm = TRUE)
  breaks <- axisTicks(rangeb/2,log = TRUE,nint = 7)
  rangeplot <- rangeb
  rangeplot[1] <- rangeplot[1] - diff(rangeb)
  rangeplot[2] <- rangeplot[2] + 0.15 * diff(rangeb)
  width <- diff(rangeplot)
  y_variable <- rangeplot[1] + cpositions[1] * width
  y_nlevel <- rangeplot[1] + cpositions[2] * width
  y_cistring <- rangeplot[1] + cpositions[3] * width
  y_stars <- rangeb[2]
  x_annotate <- seq_len(nrow(toShowExpClean))
  annot_size_mm <- fontsize * 
    as.numeric(grid::convertX(unit(theme_get()$text$size,"pt"),"mm"))
  p <- ggplot(toShowExpClean,aes(seq_along(var),exp(estimate))) + 
    geom_rect(aes(xmin = seq_along(var) - 0.5,xmax = seq_along(var) + 0.5,ymin = exp(rangeplot[1]),ymax = exp(rangeplot[2]),fill = ordered(seq_along(var)%%2 + 1))) +
    scale_fill_manual(values = c("#FFFFFF33","#00000033"),guide = "none") + 
    geom_point(pch = 15,size = 4) + 
    geom_errorbar(aes(ymin = exp(conf.low),ymax = exp(conf.high)),width = 0.15) + 
    geom_hline(yintercept = 1,linetype = 3) + 
    coord_flip(ylim = exp(rangeplot)) + 
    ggtitle(main) + 
    scale_y_log10(name = "",labels = sprintf("%g",breaks),expand = c(0.02,0.02),breaks = breaks) + 
    theme_light() +
    theme(panel.grid.minor.y = element_blank(),panel.grid.minor.x = element_blank(),panel.grid.major.y = element_blank(),legend.position = "none",panel.border = element_blank(),axis.title.y = element_blank(),axis.text.y = element_blank(),axis.ticks.y = element_blank(),plot.title = element_text(hjust = 0.5)) + 
    xlab("") + 
    annotate(geom = "text",x = x_annotate,y = exp(y_variable),label = toShowExpClean$var,fontface = "bold",hjust = 0,size = annot_size_mm) + 
    annotate(geom = "text",y = exp(y_nlevel),label = toShowExpClean$level,vjust = -0.1,label = toShowExpClean$N,fontface = "italic",vjust = ifelse(toShowExpClean$level == "",0.5,1.1),y = exp(y_cistring),label = toShowExpClean$estimate.1,size = annot_size_mm,vjust = ifelse(toShowExpClean$estimate.1 == "reference",-0.1)) + 
    annotate(geom = "text",label = toShowExpClean$ci,vjust = 1.1,fontface = "italic") + 
    annotate(geom = "text",y = exp(y_stars),label = toShowExpClean$stars,hjust = -0.2,fontface = "italic") +
    annotate(geom = "text",x = 0.5,label = paste0("# Events: ",gmodel$nevent,"; Global p-value (Log-Rank): ",format.pval(gmodel$p.value.log,eps = ".001")," \nAIC: ",round(gmodel$AIC,2),"; Concordance Index: ",round(gmodel$concordance,2)),vjust = 1.2,fontface = "italic")
  gt <- ggplot_gtable(ggplot_build(p))
  gt$layout$clip[gt$layout$name == "panel"] <- "off"
  ggpubr::as_ggplot(gt)
}

这是ggforest函数当前位置的快照,具有与上面相同的修改。如果程序包的创建者将来对程序包进行了修改,则这可能会中断或过时。但目前,ggforest2(stratamodel)将产生与方法1相同的结果。