如何将数据框中的 Tukey HSD p 值括号添加到列表列表中的 ggplots？

问题描述

我有一个循环，可以为任何给定数量的分析物（在本例中为 3）生成 ggplots 列表。执行方差分析并为每个 ggplot 生成和映射 p 值。

但是，我还想用 Tukey HSD p 值括号注释这些图。如果可能，我只想可视化低于调整后的 p

我编写了两个脚本来分别执行这些操作；我的循环的输出是一个列表列表（ggplot 元素），而我的 Tukey HSD 脚本的输出是一个数据框。

我所指的括号在本示例 ggplot 中看起来类似于：

以下是我的代码。一、数据：

set.seed(10)
Label<-as.data.frame(c("Baseline","Baseline","A","B","C","D"))
Label<-do.call("rbind",replicate(10,Label,simplify = FALSE))
colnames(Label)<-"Label"
Analyte1<- rnorm(90,mean = 1,sd = 1)
Analyte2<- rnorm(90,sd = 1)
Analyte3<- rnorm(90,mean = .2,sd = 1)
df<-cbind(Label,Analyte1,Analyte2,Analyte3)

以下是我的方差分析循环：

library(dplyr)
library(ggplot2)
library(ggpubr)
library(rstatix)
library(rlist)

# Select numeric columns to obtain df length.
sample_list <-
  colnames(select_if(df,is.numeric))
sample_list

# Create a list where the plots will be saved.
ANOVA.plots <- list()

# The for loop.
for (i in 1:length(sample_list)) {
  ANOVA.ggplot <-
    ggplot(
      df,aes_string(
        x = "Label",y = sample_list[i],fill = "Label",title = sample_list[i],outlier.shape = NA
      )
    ) +
    border(color = "black",size = 2.5) +
    geom_jitter(
      aes(fill = Label),shape = 21,size = 4,color = "black",stroke = 1,position = position_jitter()
    ) +
    geom_Boxplot(alpha = 0.7,linetype = 1,size = 1.1) +
    theme(legend.position = "none") +
    scale_fill_brewer(palette = "Set2") +
    stat_Boxplot(geom = "errorbar",size = 1.1,width = 0.4) +
    theme (axis.title.y = element_blank()) +
    theme (axis.title.x = element_blank()) +
    stat_compare_means(
      inherit.aes = TRUE,data = df,method =  "anova",paired = FALSE,method.args = list(var.equal = TRUE),fontface = "bold.italic",size = 5,vjust = 0,hjust = 0
    )
  ANOVA.plots[[i]] <-  ANOVA.ggplot
}

# Print each ggplot element from the list of lists.
ANOVA.plots

目前，此块生成的图看起来与分析物 2 中的图相似。

最后，这个 chunk 用于获取 Tukey HSD 数据帧。

TUKEY.length<-length(sample_list)

TUKEY.list <-
  vector(mode = "list",length = TUKEY.length) # Empty list for looping,where Tukey HSD results are stored.

for (i in 1:TUKEY.length + 1) {
  TUKEY.list[[i]] <-
    aov(df[[i]] ~ df[[1]]) %>% tukey_hsd() # Obtain Tukey HSD p-values from comparing groups.
}

TUKEY.list <-
  TUKEY.list[lengths(TUKEY.list) > 0L] # Remove empty lists,artifacts from piping.

p.value.threshold <- 0.05

TUKEY.df<-list.rbind(TUKEY.list)
Analytes <-
  colnames(df[,sapply(df,class) %in% c('integer','numeric')]) 
# Stores the analyte names for Tukey. Only pulls numeric colnames.

# The following was done to properly bind the analyte IDs to the Tukey data frame.
Analytes.df<-as.data.frame(Analytes)
Analytes.df$Value<-c(1:nrow(Analytes.df))
Analytes.df<-do.call("rbind",Analytes.df,simplify = FALSE))

Analytes.df <-
  Analytes.df[order(Analytes.df$Value),] 
# Orders the dataframe so that the rownames can be assigned to the TUKEY HSD.
Analytes.df<-as.data.frame(Analytes.df[,-2])

toDrop <- c("^term","^null") # Columns to drop,left over from Tukey loop.

TUKEY.df<-TUKEY.df[,!grepl(paste(toDrop,collapse = "|"),names(TUKEY.df))]
TUKEY.df<-cbind(Analytes.df,TUKEY.df)
TUKEY.df<-filter(TUKEY.df,p.adj <= p.value.threshold)

所需的输出仍会显示方差分析 p 值，并且在 Tukey HSD 之后调整 p 值低于 0.05 的那些组的括号。

在本例中，只有分析物 2，A 组和 B 组，调整后的 p 值低于 0.05（即 p = 0.000754），因此这两个图上方应出现一个括号。但是，映射这些 Tukey-HSD 括号的代码应该能够适用于更多变量（即 20 多种分析物）并捕获对每个图都重要的所有成对比较，而不仅仅是我给出的。

解决方法

您可以使用 ggsignif 进行自定义注释。示例使用 p

library(ggplot2)
library(ggsignif)
library(cowplot)
library(data.table)

set.seed(10)
df <- data.table(Label=rep(c("Baseline","Baseline","A","B","C","D"),10),`Analyte 1` = rnorm(90,mean = 1,sd = 1),`Analyte 2` = rnorm(90,`Analyte 3` = rnorm(90,mean = .2,sd = 1))
df[,Label := factor(Label,unique(Label))]
df <- melt(df,id.vars = "Label",variable.name = "Analyte")
dfl <- split(df,df$Analyte,drop=TRUE)

doPlots <- function(x,signif.cutoff=.05){
    set.seed(123)
    p1 <- ggplot(dfl[[x]],aes(x=Label,y=value,fill=Label)) +
        geom_boxplot(alpha = 0.7,linetype = 1,size = 1.1) +
        theme(legend.position = "none") +
        scale_fill_brewer(palette = "Set2") +
        stat_boxplot(geom = "errorbar",size = 1.1,width = 0.4) +
        geom_jitter(aes(fill = Label),shape = 21,size = 4,stroke = 1) +
        ggtitle(x)
    a <- stats::TukeyHSD(stats::aov(value ~ Label,data = dfl[[x]]))[[1]]
    a <- stats::setNames(
        data.frame(
            do.call(rbind,strsplit(rownames(a),"-")),a[,"p adj"]
        ),c("Var1","Var2","p") )
    a <- subset(a[stats::complete.cases(a),],p < signif.cutoff)
    if (nrow(a) == 0) return(p1) else {
        a$p <- formatC(signif(a$p,digits = 3),digits = 3,format = "g",flag = "#")
        keep.tests <- unname(t(apply(a[,-3],1,sort)))
        keep.tests <- unname(split(keep.tests,seq(dim(keep.tests)[1])))
    ts <- list(a = a,keep.tests = keep.tests)
    p1 + ggsignif::geom_signif(
        comparisons=ts$keep.tests,test="TukeyHSD",annotations=ts$a$p,step_increase=0.1)
    }
}

plot_grid(plotlist=lapply(names(dfl),doPlots,signif.cutoff=.2),ncol=3)

^{由 reprex package (v1.0.0) 于 2021 年 1 月 29 日创建}

anova ggplot2 r r tukey