查找数据集 R 中的所有对

问题描述

我有一个像这样有 3 个列的数据集。

id_evt = c(1,1,2,3,3)
id_participant = c(1,4,5,6,8,9,10)
sex = c(W,M,W,M)

df <- data.frame(cbind(id_evt,id_participant,sex))




id_evt = id of a specifics event
id_participant = id of one participant
sex = sex of the participant 

我想找到所有参加同一活动的男女组合。

这是我尝试过的。它有效,但我想获得每对所有事件的列表

library(dplyr)

# create one data set for females
females <- df %>%
 filter(sex == "W") %>%
 select(f_id = id_participant,f_group = id_evt)

# create one data set for males
males <- df %>%
 filter(sex == "M") %>%
 select(m_id = id_participant,m_group = id_evt)

# All possible pairings of males and females
pairs <- expand.grid(f_id = females %>% pull(f_id),m_id = males %>% pull(m_id),stringsAsFactors = FALSE) 

# Merge in @R_920_4045@ion about each individual
pairs <- pairs %>%
 left_join(females,by = "f_id") %>%
 left_join(males,by = "m_id") %>%
 # eliminate any pairings that are in different groups
 filter(f_group == m_group) 

非常感谢,

解决方法

也许你可以试试这个 -

library(dplyr)

df %>%
  group_by(id_evt) %>%
  summarise(pair = c(outer(sort(id_participant[sex == 'M']),sort(id_participant[sex == 'W']),paste,sep = '-'))) %>%
  ungroup %>%
  count(pair,sort = TRUE,name = 'number_of_events')

#   pair  number_of_events
#   <chr>            <int>
# 1 2-3                  2
# 2 4-3                  2
# 3 10-3                 1
# 4 10-9                 1
# 5 2-1                  1
# 6 2-5                  1
# 7 2-9                  1
# 8 4-1                  1
# 9 4-5                  1
#10 4-9                  1
#11 6-1                  1
#12 6-3                  1
#13 6-5                  1
#14 8-1                  1
#15 8-3                  1
#16 8-5                  1
,

也许是这样的?

library(data.table)
ans <- lapply( split(setDT(df),by = "id_evt"),function(x) {
  CJ(M = x[sex == "M",id_participant],W = x[sex == "W",id_participant])
})

# $`1`
#    M W
# 1: 2 1
# 2: 2 3
# 3: 2 5
# 4: 4 1
# 5: 4 3
# 6: 4 5
# 
# $`2`
#    M W
# 1: 6 1
# 2: 6 3
# 3: 6 5
# 4: 8 1
# 5: 8 3
# 6: 8 5
# 
# $`3`
#     M W
# 1: 10 3
# 2: 10 9
# 3:  2 3
# 4:  2 9
# 5:  4 3
# 6:  4 9

这是您的 vbase 信息...如果您想了解团队配对的频率(针对哪个事件),您可以执行如下操作: #多久同一对?

rbindlist(ans,idcol = "id_evt")[,.(.N,events = paste0(id_evt,collapse = ";")),by = .(M,W)]
#     M W N events
# 1:  2 1 1      1
# 2:  2 3 2    1;3
# 3:  2 5 1      1
# 4:  4 1 1      1
# 5:  4 3 2    1;3
# 6:  4 5 1      1
# 7:  6 1 1      2
# 8:  6 3 1      2
# 9:  6 5 1      2
#10:  8 1 1      2
#11:  8 3 1      2
#12:  8 5 1      2
#13: 10 3 1      3
#14: 10 9 1      3
#15:  2 9 1      3
#16:  4 9 1      3