问题描述
考虑以下数据框:
df1
# bacteria sample Number_x Number_y
#1 A HM_001 100 30
#2 B HM_001 50 60
#3 C HM_001 300 10
#4 D A2_HM_001 400 20
#5 E A2_HM_001 22 11
#6 F HM_002 23 35
#7 G HM_002 120 46
#8 H HM_003 50 51
# … with 1,342 more rows
按样本分组,我希望对每种细菌进行行式两侧 Fisher 精确检验。 (例如 HM_001 如下所示)。
HM_001 | Number_x | Number_y |
---|---|---|
A | 100 | 30 |
其他(在本例中为 B 和 C) | 350 | 70 |
HM_001 | Number_x | Number_y |
---|---|---|
B | 50 | 60 |
其他(在本例中为 A 和 C) | 400 | 40 |
等等,基本上为数据帧中的 1350 行中的每一行生成一个 p 值。
以下是我的尝试:
Fisher_result <- df1 %>%
group_by(sample) %>%
row_wise_fisher_test(as.matrix(df1[,c(3,4)]),p.adjust.method = "BH")
Error in row_wise_fisher_test(.,as.matrix(df1[,:
A cross-tabulation with two columns required
任何指针将不胜感激!
解决方法
您可以 group_by
每个 sample
并将 row_wise_fisher_test
应用到每个组并使用 unnest
将它们放在单独的列中。
library(dplyr)
library(tidyr)
library(rstatix)
df1 %>%
group_by(sample) %>%
summarise(data = list(row_wise_fisher_test(as.matrix(select(cur_data(),starts_with('Number'))),p.adjust.method = "BH"))) %>%
unnest_wider(data) %>%
unnest(c(group:p.adj.signif)) -> Fisher_result
Fisher_result
# sample group n p p.adj p.adj.signif
# <chr> <chr> <int> <dbl> <dbl> <chr>
#1 A2_HM_001 1 453 1.73e- 6 1.73e- 6 ****
#2 A2_HM_001 2 453 1.73e- 6 1.73e- 6 ****
#3 HM_001 1 550 1.18e- 1 1.18e- 1 ns
#4 HM_001 2 550 9.31e-24 1.40e-23 ****
#5 HM_001 3 550 1.57e-26 4.71e-26 ****
#6 HM_002 1 224 1.44e- 5 1.44e- 5 ****
#7 HM_002 2 224 1.44e- 5 1.44e- 5 ****
#8 HM_003 1 101 1.00e+ 0 1.00e+ 0 ns
数据
df1 <- structure(list(bacteria = c("A","B","C","D","E","F","G","H"),sample = c("HM_001","HM_001","A2_HM_001","HM_002","HM_003"),Number_x = c(100L,50L,300L,400L,22L,23L,120L,50L),Number_y = c(30L,60L,10L,20L,11L,35L,46L,51L)),class = "data.frame",row.names = c(NA,-8L))