如何按R和dplyr中的分组变量集进行汇总?

问题描述

我想使用不同的分组变量集对数据框进行分组。对于每个组,我想计算观察数(或以其他任何方式汇总),然后将所有结果收集在一个数据框中。

重要:我想以编程方式定义分组变量集,例如列表。

我如何在tidyverse中实现这一目标?

这是我的尝试:

library(tidyverse)

count_by_group <- function(...) {
  mtcars %>%
    count(...) %>%
    mutate(
      grouping_variable = paste(ensyms(...),collapse = "."),group = paste(!!!enquos(...),sep = ".")
    ) %>%
    select(grouping_variable,group,n)
}

# I want this ...
bind_rows(
  count_by_group(cyl),count_by_group(gear),count_by_group(cyl,gear)
)
#>    grouping_variable group  n
#> 1                cyl     4 11
#> 2                cyl     6  7
#> 3                cyl     8 14
#> 4               gear     3 15
#> 5               gear     4 12
#> 6               gear     5  5
#> 7           cyl.gear   4.3  1
#> 8           cyl.gear   4.4  8
#> 9           cyl.gear   4.5  2
#> 10          cyl.gear   6.3  2
#> 11          cyl.gear   6.4  4
#> 12          cyl.gear   6.5  1
#> 13          cyl.gear   8.3 12
#> 14          cyl.gear   8.5  2

# ... but without the repetition of "count_by_group(var)".
# The following does not work:
map_dfr(
  list(
    cyl,gear,c(cyl,gear)
  ),count_by_group
)
#> Error in map(.x,.f,...): object 'cyl' not found

reprex package(v0.3.0)于2020-09-17创建

解决方法

更新(2020-10-12):更透明的解决方案(感谢@LionelHenry)

library(tidyverse)

count_by_group <- function(...) {
  dots <- enquos(...,.named = TRUE)
  names <- names(dots)

  counted <- count(mtcars,!!!dots)

  group <- counted %>%
    select(-n) %>%
    rowwise() %>%
    mutate(paste(c_across(),collapse = ".")) %>%
    pull()

  # # Equivalently:
  # group <- counted %>%
  #   select(-n) %>%
  #   pmap(counted,paste,sep = ".")

  counted %>%
    mutate(
      grouping_variable = paste(names,collapse = "."),group = group
    ) %>%
    select(grouping_variable,group,n)
}

grouping_variables <- list(
  vars(cyl),vars(gear),vars(cyl,gear)
)

map_dfr(grouping_variables,~ count_by_group(!!! .x))
#>    grouping_variable group  n
#> 1                cyl     4 11
#> 2                cyl     6  7
#> 3                cyl     8 14
#> 4               gear     3 15
#> 5               gear     4 12
#> 6               gear     5  5
#> 7           cyl.gear   4.3  1
#> 8           cyl.gear   4.4  8
#> 9           cyl.gear   4.5  2
#> 10          cyl.gear   6.3  2
#> 11          cyl.gear   6.4  4
#> 12          cyl.gear   6.5  1
#> 13          cyl.gear   8.3 12
#> 14          cyl.gear   8.5  2

reprex package(v0.3.0)于2020-10-12创建


我刚刚发现这可行!

library(tidyverse)

count_by_group <- function(...) {
  mtcars %>%
    count(...) %>%
    mutate(
      grouping_variable = paste(ensyms(...),group = paste(!!!enquos(...),sep = ".")
    ) %>%
    select(grouping_variable,~count_by_group(!!! .))
#>    grouping_variable group  n
#> 1                cyl     4 11
#> 2                cyl     6  7
#> 3                cyl     8 14
#> 4               gear     3 15
#> 5               gear     4 12
#> 6               gear     5  5
#> 7           cyl.gear   4.3  1
#> 8           cyl.gear   4.4  8
#> 9           cyl.gear   4.5  2
#> 10          cyl.gear   6.3  2
#> 11          cyl.gear   6.4  4
#> 12          cyl.gear   6.5  1
#> 13          cyl.gear   8.3 12
#> 14          cyl.gear   8.5  2

reprex package(v0.3.0)于2020-10-12创建