将ID分配给连续的组列r

问题描述

我想在data.frame中产生一列,以计算组的连续ID(虚拟df中的s列)

dummy_df = data.frame(s = c("a","a","b","c","a"),desired_output= c(1,1,2,3,3))
dummy_df$rleid_output = rleid(dummy_df$s)
dummy_df
   s desired_output rleid_output
1  a              1            1
2  a              1            1
3  b              1            2
4  b              1            2
5  b              1            2
6  c              1            3
7  c              1            3
8  a              2            4
9  a              2            4
10 c              2            5
11 c              2            5
12 a              3            6
13 a              3            6

我想说的与rleid()相似,但是在看到一个新的组时重新开始计数。但是,我找不到一种如此直接的方法。谢谢。

解决方法

您可以这样做:

dummy_df$out <- with(rle(dummy_df$s),rep(ave(lengths,values,FUN = seq_along),lengths))

结果:

   s desired_output out
1  a              1   1
2  a              1   1
3  b              1   1
4  b              1   1
5  b              1   1
6  c              1   1
7  c              1   1
8  a              2   2
9  a              2   2
10 c              2   2
11 c              2   2
12 a              3   3
13 a              3   3
,

如果您愿意使用data.tablerleid是软件包的一部分),则可以按照以下两个步骤进行操作:

library(data.table)
dummy_df = data.frame(s = c("a","a","b","c","a"))
# cast data.frame to data.table
setDT(dummy_df)
# create auxiliary variable
dummy_df[,rleid_output := rleid(s)]
# obtain desired output
dummy_df[,desired_output := rleid(rleid_output),by = "s"]
# end result
dummy_df
#>     s rleid_output desired_output
#>  1: a            1              1
#>  2: a            1              1
#>  3: b            2              1
#>  4: b            2              1
#>  5: b            2              1
#>  6: c            3              1
#>  7: c            3              1
#>  8: a            4              2
#>  9: a            4              2
#> 10: c            5              2
#> 11: c            5              2
#> 12: a            6              3
#> 13: a            6              3

reprex package(v0.3.0)于2020-10-16创建

,

您可以将tidyverse与基本R rle函数结合使用

library(tidyverse)
rle(dummy_df$s) %>% 
  with(.,data.frame(a=.$length,b=.$value)) %>% 
  group_by(b) %>% 
  mutate(n = 1:n()) %>%
  with(.,rep(n,times=a)) %>% 
  bind_cols(dummy_df,res=.)
   s desired_output res
1  a              1   1
2  a              1   1
3  b              1   1
4  b              1   1
5  b              1   1
6  c              1   1
7  c              1   1
8  a              2   2
9  a              2   2
10 c              2   2
11 c              2   2
12 a              3   3
13 a              3   3