问题描述
Rule <- c('Rule 1','Rule 1','Rule 2','Rule 2')
Condition <- c('1 of 4','2 of 4','3 of 4','4 of 4','1 of 3','2 of 3','3 of 3')
Clause <- c('Temperature > 60','Temperature < 90','Rain = 0','Wind < 20','Temperature > 55','Temperature < 85','Rain <= 2')
Lift <- c('1.30','1.30','1.60','1.60')
Coverage <- c('20%','20%','35%','35%')
DF <- data.frame(Rule,Condition,Clause,Lift,Coverage)
进入此数据框;
Rule <- c('Rule 1','','Rain <= 2')
Lift <- c('','1.60')
Coverage <- c('','35%')
Result <- data.frame(Rule,Coverage)
请注意,用于分隔规则的新空白行和重复的提升和覆盖率指标已被删除。仅保留每条规则最后一行的“提升和覆盖范围”。
解决方法
您可以创建一个空白行以插入每个Rule
中:
empty_df <- data.frame(matrix('',nrow = 1,ncol = ncol(DF),dimnames = list(NULL,names(DF))))
分割每个唯一Rule
的数据,用空白添加Lift
替换Coverage
和empty_df
列中的重复值,然后合并结果。
library(dplyr)
DF %>%
group_split(Rule) %>%
purrr::map_df(~.x %>%
mutate(across(c(Lift,Coverage),~replace(.,duplicated(.,fromLast = TRUE),''))) %>%
bind_rows(empty_df)
) %>%
#Remove the blank row from last `Rule`.
slice(-n())
# Rule Condition Clause Lift Coverage
# <chr> <chr> <chr> <chr> <chr>
#1 "Rule 1" "1 of 4" "Temperature > 60" "" ""
#2 "Rule 1" "2 of 4" "Temperature < 90" "" ""
#3 "Rule 1" "3 of 4" "Rain = 0" "" ""
#4 "Rule 1" "4 of 4" "Wind < 20" "1.30" "20%"
#5 "" "" "" "" ""
#6 "Rule 2" "1 of 3" "Temperature > 55" "" ""
#7 "Rule 2" "2 of 3" "Temperature < 85" "" ""
#8 "Rule 2" "3 of 3" "Rain <= 2" "1.60" "35%"