问题描述
# A tibble: 10 x 4
# Groups: Month [1]
Response.Status UNSUBSCRIBE Month Year
<fct> <fct> <fct> <chr>
1 SURVEY_OPENED NA Nov 2020
2 NOT_RESPONDED TRUE Nov 2020
3 EMAIL_OPENED NA Nov 2020
4 NOT_RESPONDED NA Nov 2020
5 NOT_RESPONDED TRUE Nov 2020
6 EMAIL_OPENED NA Nov 2020
7 NOT_RESPONDED TRUE Nov 2020
8 EMAIL_OPENED NA Nov 2020
9 SURVEY_OPENED TRUE Nov 2020
10 EMAIL_OPENED NA Nov 2020
structure(list(Response.Status = structure(c(5L,2L,1L,1L),.Label = c("EMAIL_OPENED","NOT_RESPONDED","PARTIALLY_SAVED","SUBMITTED","SURVEY_OPENED","UNDELIVERED_OR_BOUNCED"),class = "factor"),UNSUBSCRIBE = structure(c(NA_integer_,TRUE,NA_integer_,TRUE),.Label = "TRUE",Month = c("Nov","Nov","Nov"),Year = c("2020","2020","2020")),row.names = c(NA,-6L),groups = structure(list(Month = "Nov",.rows = structure(list(
1:6),ptype = integer(0),class = c("vctrs_list_of","vctrs_vctr","list"))),row.names = 1L,class = c("tbl_df","tbl","data.frame"
),.drop = TRUE),class = c("grouped_df","tbl_df","data.frame"
))
我转换了此表以便使用下面的表“ Updated_DF”来计算每月的“ Response.Status”总数:
Updated_DF <- DF1 %>%
mutate(Month = format(as.Date(date,format = "%Y/%m/%d"),"%m/%Y")) %>%
group_by(Month,Response.Status) %>%
summarise(sum = n()) %>%
pivot_wider(names_from = Month,values_from = sum)
# A tibble: 6 x 16
Response.Status `01/2020` `02/2020` `03/2020` `04/2020` `05/2020` `06/2020` `07/2020` `08/2020` `09/2019` `09/2020` `10/2019` `10/2020` `11/2019` `11/2020` `12/2019`
<fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 EMAIL_OPENED 1090 3123 4087 5005 2098 1876 4278 3665 901 4162 883 2597 1187 690 883
2 NOT_RESPONDED 3205 9733 13177 15255 5470 4788 12708 10731 2835 15088 2846 8094 3665 1938 2667
3 PARTIALLY_SAVED 5 34 56 8 28 22 73 86 11 14 7 23 8 8 2
4 SUBMITTED 216 557 838 828 357 310 654 621 214 1001 233 497 264 122 194
5 SURVEY_OPENED 164 395 597 1016 245 212 513 625 110 588 123 349 202 94 120
6 UNDELIVERED_OR_BOUNCED 92 280 318 260 109 127 319 321 63 445 69 192 93 39 74
“ Updated_DF”的数据结构:
structure(list(Response.Status = structure(1:6,`01/2020` = c(1090L,3205L,5L,216L,164L,92L),`02/2020` = c(3123L,9733L,34L,557L,395L,280L),`03/2020` = c(4087L,13177L,56L,838L,597L,318L),`04/2020` = c(5005L,15255L,8L,828L,1016L,260L),`05/2020` = c(2098L,5470L,28L,357L,245L,109L),`06/2020` = c(1876L,4788L,22L,310L,212L,127L),`07/2020` = c(4278L,12708L,73L,654L,513L,319L),`08/2020` = c(3665L,10731L,86L,621L,625L,321L),`09/2019` = c(901L,2835L,11L,214L,110L,63L),`09/2020` = c(4162L,15088L,14L,1001L,588L,445L),`10/2019` = c(883L,2846L,7L,233L,123L,69L),`10/2020` = c(2597L,8094L,23L,497L,349L,192L),`11/2019` = c(1187L,3665L,264L,202L,93L),`11/2020` = c(690L,1938L,122L,94L,39L),`12/2019` = c(883L,2667L,194L,120L,74L)),"data.frame"))
我想做的是将DF1中的“ UNSUBSCRIBE”字段添加到我的转置过程中,以使Response.Status包括每个月“ UNSUBSCRIBE”的TRUE总计值-即使它是单独的列
我对如何执行此操作完全感到困惑-我最终计划使用formattable并为此添加颜色主题。
期望的输出:
Response.Status `01/2020` `02/2020` `03/2020` `04/2020` `05/2020` `06/2020` `07/2020` `08/2020` `09/2019` `09/2020` `10/2019` `10/2020` `11/2019` `11/2020` `12/2019`
<fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 EMAIL_OPENED 1090 3123 4087 5005 2098 1876 4278 3665 901 4162 883 2597 1187 690 883
2 NOT_RESPONDED 3205 9733 13177 15255 5470 4788 12708 10731 2835 15088 2846 8094 3665 1938 2667
3 PARTIALLY_SAVED 5 34 56 8 28 22 73 86 11 14 7 23 8 8 2
4 SUBMITTED 216 557 838 828 357 310 654 621 214 1001 233 497 264 122 194
5 SURVEY_OPENED 164 395 597 1016 245 212 513 625 110 588 123 349 202 94 120
6 UNDELIVERED_OR_BOUNCED 92 280 318 260 109 127 319 321 63 445 69 192 93 39 74
7 UNSUBSCRIBE 92 280 318 260 109 127 319 321 63 445 69 192 93 39 74
解决方法
这是您要寻找的吗?
library(dplyr)
library(tidyr)
DF1 %>%
mutate(UNSUBSCRIBE = if_else(UNSUBSCRIBE == "TRUE","UNSUBSCRIBE",NA_character_)) %>%
pivot_longer(c(Response.Status,UNSUBSCRIBE),values_to = "Response.Status") %>%
drop_na() %>%
count(Month,Year,Response.Status) %>%
pivot_wider(names_from = c("Month","Year"),names_sep = "/",values_from = n)
输出
# A tibble: 4 x 2
Response.Status `Nov/2020`
<chr> <int>
1 EMAIL_OPENED 2
2 NOT_RESPONDED 3
3 SURVEY_OPENED 1
4 UNSUBSCRIBE 3