如何在R中基于group by将单独的列值添加到另一列?

问题描述

我有一个名为“ DF1”的数据表(如下所示):

# A tibble: 10 x 4
# Groups:   Month [1]
   Response.Status UNSUBSCRIBE   Month Year 
   <fct>           <fct>         <fct> <chr>
 1 SURVEY_OPENED   NA            Nov   2020 
 2 NOT_RESPONDED   TRUE          Nov   2020 
 3 EMAIL_OPENED    NA            Nov   2020 
 4 NOT_RESPONDED   NA            Nov   2020 
 5 NOT_RESPONDED   TRUE          Nov   2020 
 6 EMAIL_OPENED    NA            Nov   2020 
 7 NOT_RESPONDED   TRUE          Nov   2020 
 8 EMAIL_OPENED    NA            Nov   2020 
 9 SURVEY_OPENED   TRUE          Nov   2020 
10 EMAIL_OPENED    NA            Nov   2020 

structure(list(Response.Status = structure(c(5L,2L,1L,1L),.Label = c("EMAIL_OPENED","NOT_RESPONDED","PARTIALLY_SAVED","SUBMITTED","SURVEY_OPENED","UNDELIVERED_OR_BOUNCED"),class = "factor"),UNSUBSCRIBE = structure(c(NA_integer_,TRUE,NA_integer_,TRUE),.Label = "TRUE",Month = c("Nov","Nov","Nov"),Year = c("2020","2020","2020")),row.names = c(NA,-6L),groups = structure(list(Month = "Nov",.rows = structure(list(
    1:6),ptype = integer(0),class = c("vctrs_list_of","vctrs_vctr","list"))),row.names = 1L,class = c("tbl_df","tbl","data.frame"
),.drop = TRUE),class = c("grouped_df","tbl_df","data.frame"
))

我转换了此表以便使用下面的表“ Updated_DF”来计算每月的“ Response.Status”总数:

Updated_DF <- DF1 %>%
  mutate(Month = format(as.Date(date,format = "%Y/%m/%d"),"%m/%Y")) %>%
  group_by(Month,Response.Status) %>%
  summarise(sum = n()) %>%
  pivot_wider(names_from = Month,values_from = sum)

# A tibble: 6 x 16
  Response.Status        `01/2020` `02/2020` `03/2020` `04/2020` `05/2020` `06/2020` `07/2020` `08/2020` `09/2019` `09/2020` `10/2019` `10/2020` `11/2019` `11/2020` `12/2019`
  <fct>                      <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>
1 EMAIL_OPENED                1090      3123      4087      5005      2098      1876      4278      3665       901      4162       883      2597      1187       690       883
2 NOT_RESPONDED               3205      9733     13177     15255      5470      4788     12708     10731      2835     15088      2846      8094      3665      1938      2667
3 PARTIALLY_SAVED                5        34        56         8        28        22        73        86        11        14         7        23         8         8         2
4 SUBMITTED                    216       557       838       828       357       310       654       621       214      1001       233       497       264       122       194
5 SURVEY_OPENED                164       395       597      1016       245       212       513       625       110       588       123       349       202        94       120
6 UNDELIVERED_OR_BOUNCED        92       280       318       260       109       127       319       321        63       445        69       192        93        39        74

“ Updated_DF”的数据结构:

structure(list(Response.Status = structure(1:6,`01/2020` = c(1090L,3205L,5L,216L,164L,92L),`02/2020` = c(3123L,9733L,34L,557L,395L,280L),`03/2020` = c(4087L,13177L,56L,838L,597L,318L),`04/2020` = c(5005L,15255L,8L,828L,1016L,260L),`05/2020` = c(2098L,5470L,28L,357L,245L,109L),`06/2020` = c(1876L,4788L,22L,310L,212L,127L),`07/2020` = c(4278L,12708L,73L,654L,513L,319L),`08/2020` = c(3665L,10731L,86L,621L,625L,321L),`09/2019` = c(901L,2835L,11L,214L,110L,63L),`09/2020` = c(4162L,15088L,14L,1001L,588L,445L),`10/2019` = c(883L,2846L,7L,233L,123L,69L),`10/2020` = c(2597L,8094L,23L,497L,349L,192L),`11/2019` = c(1187L,3665L,264L,202L,93L),`11/2020` = c(690L,1938L,122L,94L,39L),`12/2019` = c(883L,2667L,194L,120L,74L)),"data.frame"))

我想做的是将DF1中的“ UNSUBSCRIBE”字段添加到我的转置过程中,以使Response.Status包括每个月“ UNSUBSCRIBE”的TRUE总计值-即使它是单独的列

我对如何执行此操作完全感到困惑-我最终计划使用formattable并为此添加颜色主题

期望的输出

  Response.Status        `01/2020` `02/2020` `03/2020` `04/2020` `05/2020` `06/2020` `07/2020` `08/2020` `09/2019` `09/2020` `10/2019` `10/2020` `11/2019` `11/2020` `12/2019`
  <fct>                      <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>
1 EMAIL_OPENED                1090      3123      4087      5005      2098      1876      4278      3665       901      4162       883      2597      1187       690       883
2 NOT_RESPONDED               3205      9733     13177     15255      5470      4788     12708     10731      2835     15088      2846      8094      3665      1938      2667
3 PARTIALLY_SAVED                5        34        56         8        28        22        73        86        11        14         7        23         8         8         2
4 SUBMITTED                    216       557       838       828       357       310       654       621       214      1001       233       497       264       122       194
5 SURVEY_OPENED                164       395       597      1016       245       212       513       625       110       588       123       349       202        94       120
6 UNDELIVERED_OR_BOUNCED        92       280       318       260       109       127       319       321        63       445        69       192        93        39        74
7 UNSUBSCRIBE                  92       280       318       260       109       127       319       321        63       445        69       192        93        39        74

解决方法

这是您要寻找的吗?

library(dplyr)
library(tidyr)

DF1 %>% 
  mutate(UNSUBSCRIBE = if_else(UNSUBSCRIBE == "TRUE","UNSUBSCRIBE",NA_character_)) %>% 
  pivot_longer(c(Response.Status,UNSUBSCRIBE),values_to = "Response.Status") %>% 
  drop_na() %>% 
  count(Month,Year,Response.Status) %>% 
  pivot_wider(names_from = c("Month","Year"),names_sep = "/",values_from = n)

输出

# A tibble: 4 x 2
  Response.Status `Nov/2020`
  <chr>                <int>
1 EMAIL_OPENED             2
2 NOT_RESPONDED            3
3 SURVEY_OPENED            1
4 UNSUBSCRIBE              3

相关问答

Selenium Web驱动程序和Java。元素在(x,y)点处不可单击。其...
Python-如何使用点“。” 访问字典成员?
Java 字符串是不可变的。到底是什么意思?
Java中的“ final”关键字如何工作?(我仍然可以修改对象。...
“loop:”在Java代码中。这是什么,为什么要编译?
java.lang.ClassNotFoundException:sun.jdbc.odbc.JdbcOdbc...