问题描述
代码完全可以满足需要,但是使用这些嵌套循环的速度非常慢(一次超过10k列),有没有更有效/简单的方法呢?
all_xpath<-unique(dplyr::pull(tibble(all.df['Xpath']),Xpath))
all_section<-unique(dplyr::pull(tibble(all.df['section']),section))
valueS<-unique(dplyr::pull(tibble(all.df['attr.']),attr.))
for (j in all_section){
for (f in all_xpath){
for (g in valueS){
allx <- data.frame(filter(all.df,section==j & attr. == g & Xpath == f),stringsAsFactors=FALSE)
if (nrow(allx)>1){
value<-paste(allx$value.,collapse = ' | ')
if ( any(all.df[all.df$section==j & all.df$attr.==g & all.df$Xpath==f,]$elem.!='telecom')){
all.df[all.df$section==j & all.df$attr.==g & all.df$Xpath==f,]$value. <- value
}}}}}
valueS<-unique(dplyr::pull(tibble(all.df['elem.']),elem.))
#concat by elem. and xpath within component
for (j in all_section){
for (f in all_xpath){
for (g in valueS){
allx <- data.frame(filter(all.df,section==j & elem. == g & Xpath == f & attr.==""),stringsAsFactors=FALSE)
if (nrow(allx)>1){
value<-paste(allx$value.,collapse = ' | ')
all.df[all.df$section==j & all.df$elem.==g & all.df$Xpath==f & all.df$attr.=="",]$value. <- value
}}}}
all.df<-dplyr::distinct(all.df)
数据的格式如下:
dput(all.df)
structure(list(section = c("LastFiled","LastFiled","Results","History","Cable","Fan","Fan"),elem. = c("code","code","id","effectiveTime","value","code"),attr. = c("code","root","",""),value. = c("8462-4","8462-5","39156-7","39156-8","59408-11","39156-13","59408-13","39156-14","59408-8","39156-9","59408-9","39156-11"),Xpath = c("/Document/othersection/entry/body/sceen/code","/Document/othersection/entry/body/sceen/code","/Document/othersection/entry/body/sceen/id","/Document/othersection/entry/sceen/effectiveTime","/Document/othersection/entry/entryRelationship","/Document/othersection/entry/procedure/entryRelationship/sceen","/Document/othersection/entry/procedure/entryRelationship/sceen"
)),row.names = c(NA,-12L),class = "data.frame")
结果应如下所示:
section elem. attr. value. Xpath
LastFiled code code 8462-4 | 8462-5 /Document/othersection/entry/body/sceen/code
LastFiled id root 39156-7 | 39156-8 /Document/othersection/entry/body/sceen/id
Results code code 59408-11 | 39156-13 /Document/othersection/entry/body/sceen/code
History effectiveTime value 59408-13 | 39156-14 /Document/othersection/entry/sceen/effectiveTime
Cable value 59408-8 | 39156-9 |
59408-8 | 39156-9 /Document/othersection/entry/entryRelationship
Fan code 59408-9 | 39156-11 |
59408-9 | 39156-11 /Document/othersection/entry/procedure/entryRelationship
解决方法
可以使用两个dplyr
管道解决问题,每个数据转换一个管道。结果包装在bind_rows
中。
library(dplyr)
bind_rows(
all.df %>%
filter(elem. != 'telecom') %>%
group_by(section,attr.,elem.,Xpath) %>%
summarise(value. = paste(value.,collapse = "|"),.groups = "keep"),all.df %>%
filter(attr. == "") %>%
group_by(section,.groups = "keep")
) %>%
mutate(attr. = ifelse(is.na(attr.),"",attr.)) %>%
relocate(value.,.before = Xpath)
## A tibble: 8 x 5
## Groups: section,Xpath [6]
# section attr. elem. value. Xpath
# <chr> <chr> <chr> <chr> <chr>
#1 Cable "" value 59408-8|39156-9 /Document/othersection/entry/entryRelationship
#2 Fan "" code 59408-9|39156-11 /Document/othersection/entry/procedure/entryRelationship/sceen
#3 History "value" effectiveTime 59408-13|39156-14 /Document/othersection/entry/sceen/effectiveTime
#4 LastFiled "code" code 8462-4|8462-5 /Document/othersection/entry/body/sceen/code
#5 LastFiled "root" id 39156-7|39156-8 /Document/othersection/entry/body/sceen/id
#6 Results "code" code 59408-11|39156-13 /Document/othersection/entry/body/sceen/code
#7 Cable "" value 59408-8|39156-9 /Document/othersection/entry/entryRelationship
#8 Fan "" code 59408-9|39156-11 /Document/othersection/entry/procedure/entryRelationship/sceen