根据2个数据场中的列及其R中的mutate值计算平均值

问题描述

我有一个数据框结构，该结构可以计算每月使用此mutate函数找到的 Response.Status 的总和：

Private Declare PtrSafe Function KillTimer Lib "user32" (ByVal hwnd As LongPtr,_
                                                         ByVal nIDEvent As LongPtr) As Long

Private Declare PtrSafe Function SetTimer Lib "user32" (ByVal hwnd As LongPtr,_
                                                        ByVal nIDEvent As LongPtr,_
                                                        ByVal uElapse As Long,_
                                                        ByVal lpTimerFunc As LongPtr) As LongPtr

Private Declare PtrSafe Sub Sleep Lib "kernel32" (ByVal dwMilliseconds As Long)

' Delete the specified sub folder from a Zip file.
' Example:
' Call DeleteZipSubFolder("E:\first.zip\first\second")
Sub DeleteZipSubFolder(PathToZipFolder)
  Dim oShell As Object ' Reference: Microsoft Shell Controls And Automation
  
  On Error Resume Next
  
  Set oShell = CreateObject("Shell.Application")
  
  TimerID = SetTimer(0,100,AddressOf TimerProc)
  
  oShell.NameSpace(PathToZipFolder).Self.InvokeVerb "Delete"
  
  KillTimer 0,TimerID
  
  Set oShell = nothing
  
  On Error Goto 0
End Sub

Public Sub TimerProc(ByVal hwnd As LongPtr,_
                     ByVal wMsg As Long,_
                     ByVal idEvent As LongPtr,_
                     ByVal dwTime As Long)
                         
  ' Wait for the Shell confirmation dialog to appear
  Sleep 100
  ' Use the Alt+Y shortcut to click the Yes button
  SendKeys "%Y"
  
End Sub

我想做的就是使用表中创建的那些值，根据每个Response.Status组中的人数计算平均值。

DF1 <- complete_df %>% 
  mutate(Month = format(as.Date(date,format = "%Y/%m/%d"),"%m/%Y"),UNSUBSCRIBE = if_else(UNSUBSCRIBE == "TRUE","UNSUBSCRIBE",NA_character_)) %>% 
  pivot_longer(c(Response.Status,UNSUBSCRIBE),values_to = "Response.Status") %>% 
  drop_na() %>% 
  count(Month,Response.Status) %>% 
  pivot_wider(names_from = Month,names_sep = "/",values_from = n)




# A tibble: 7 x 16
  Response.Status        `01/2020` `02/2020` `03/2020` `04/2020` `05/2020` `06/2020` `07/2020` `08/2020` `09/2019` `09/2020` `10/2019` `10/2020` `11/2019` `11/2020` `12/2019`
  <chr>                      <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>     <int>
1 EMAIL_OPENED                1068      3105      4063      4976      2079      1856      4249      3638       882      4140       865      2573      1167       684       862
2 NOT_RESPONDED               3187      9715     13164     15239      5458      4773     12679     10709      2798     15066      2814      8068      3641      1931      2647
3 PARTIALLY_SAVED                5        34        56         8        28        22        73        86        11        14         7        23         8         8         2
4 SUBMITTED                    216       557       838       828       357       310       654       621       214      1001       233       497       264       122       194
5 SURVEY_OPENED                164       395       597      1016       245       212       513       625       110       588       123       349       202        94       120
6 UNDELIVERED_OR_BOUNCED        92       280       318       260       109       127       319       321        63       445        69       192        93        39        74
7 UNSUBSCRIBE                  397      1011      1472      1568       727       737      1745      2189       372      1451       378       941       429       254       355

我制作了一个单独的表，其中包含基于这些组名的总和值：

structure(list(Response.Status = c("EMAIL_OPENED","NOT_RESPONDED","PARTIALLY_SAVED","SUBMITTED","SURVEY_OPENED","UNDELIVERED_OR_BOUNCED"
),`01/2020` = c(1068L,3187L,5L,216L,164L,92L),`02/2020` = c(3105L,9715L,34L,557L,395L,280L),`03/2020` = c(4063L,13164L,56L,838L,597L,318L),`04/2020` = c(4976L,15239L,8L,828L,1016L,260L),`05/2020` = c(2079L,5458L,28L,357L,245L,109L),`06/2020` = c(1856L,4773L,22L,310L,212L,127L),`07/2020` = c(4249L,12679L,73L,654L,513L,319L),`08/2020` = c(3638L,10709L,86L,621L,625L,321L),`09/2019` = c(882L,2798L,11L,214L,110L,63L),`09/2020` = c(4140L,15066L,14L,1001L,588L,445L),`10/2019` = c(865L,2814L,7L,233L,123L,69L),`10/2020` = c(2573L,8068L,23L,497L,349L,192L),`11/2019` = c(1167L,3641L,264L,202L,93L),`11/2020` = c(684L,1931L,122L,94L,39L),`12/2019` = c(862L,2647L,2L,194L,120L,74L)),row.names = c(NA,-6L),class = c("tbl_df","tbl","data.frame"))

解决方法

如果我正确理解了您的问题，则您有2个data.frame / tibble。在“结构”部分中显示的一个通知每个响应状态的人数。现在，您想获得每人的价值。如果是这样，这是可能的解决方案：

# people/users data set
df2 <- data.frame(Response.Status = c("EMAIL_OPENED","NOT_RESPONDED","PARTIALLY_SAVED","SUBMITTED","SURVEY_OPENED","UNDELIVERED_OR_BOUNCED","UNSUBSCRIBE"),PEOPLE = c(451,1563,4,71,53,47,135))
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
df %>% # this is your "structure"
  tidyr::pivot_longer(-Response.Status,names_to = "DATE",values_to = "nmbr") %>% 
  dplyr::group_by(Response.Status) %>% 
  dplyr::summarise(SUM = sum(nmbr)) %>% 
  dplyr::inner_join(df2) %>% 
  dplyr::mutate(MEAN_PP = SUM / PEOPLE)

  Response.Status           SUM PEOPLE MEAN_PP
  <chr>                   <int>  <dbl>   <dbl>
1 EMAIL_OPENED            36207    451    80.3
2 NOT_RESPONDED          111889   1563    71.6
3 PARTIALLY_SAVED           385      4    96.2
4 SUBMITTED                6906     71    97.3
5 SURVEY_OPENED            5353     53   101  
6 UNDELIVERED_OR_BOUNCED   2801     47    59.6

average dplyr dplyr formattable r r

根据2个数据场中的列及其R中的mutate值计算平均值

问题描述

解决方法

相关问答