如何使用 pivot_longer() 从宽格式到长格式重塑我的数据?

问题描述

我有一个看起来像这样的数据:

cod_child<-c(1,1,2,7,9,11,11)
redcap_event_name<-c("visita_15_arm_1","visita_16_arm_1","visita_7_arm_1","visita_10_arm_1","visita_11_arm_1","visita_8_arm_1","visita_14_arm_1","visita_12_arm_1","visita_8_arm_1")
res_orin_crea<-c(88.5,58.2,70.2,62.4,142.0,42.9,26.9,17.1,148.0,26.1,NA,33.7)
res_orin_crea_tipo<-c(1,1)
res_orin_dis24dcp<-c(0.8,0.9,0.3,0.4,0.2,0.1)
res_orin_dis24dcp_tipo<-c(3,3,3)

df<-data.frame(cod_child,redcap_event_name,res_orin_crea,res_orin_crea_tipo,res_orin_dis24dcp,res_orin_dis24dcp_tipo)  
df

   cod_child redcap_event_name res_orin_crea res_orin_crea_tipo res_orin_dis24dcp res_orin_dis24dcp_tipo
1          1   visita_15_arm_1          88.5                  1               0.8                      3
2          1   visita_16_arm_1          58.2                  1                NA                      3
3          1    visita_7_arm_1          70.2                  1               0.9                      3
4          2   visita_10_arm_1          62.4                  1               0.3                      3
5          2   visita_11_arm_1         142.0                  1                NA                      3
6          2    visita_8_arm_1          42.9                  1               0.4                      3
7          7   visita_11_arm_1          26.9                  1                NA                      3
8          7   visita_14_arm_1          17.1                  1                NA                      3
9          9   visita_12_arm_1         148.0                  1                NA                      3
10        11   visita_11_arm_1          26.1                  1               0.2                      3
11        11   visita_12_arm_1            NA                 NA                NA                      3
12        11    visita_8_arm_1          33.7                  1               0.1                      3

我想要这样的数据:

cod_child redcap_event_name compound concentration tipo
1          1   visita_15_arm_1     crea          88.5    1
2          1   visita_15_arm_1 dis24dcp           3.0    3
3          1   visita_16_arm_1     crea          58.2    1
4          1   visita_16_arm_1 dis24dcp           3.0    3
5          1    visita_7_arm_1     crea          70.2    1
6          1    visita_7_arm_1 dis24dcp           3.0    3
7          2   visita_10_arm_1     crea          62.4    1
8          2   visita_10_arm_1 dis24dcp           3.0    3
9          2   visita_11_arm_1     crea         142.0    1
10         2   visita_11_arm_1 dis24dcp           3.0    3
11         2    visita_8_arm_1     crea          42.9    1
12         2    visita_8_arm_1 dis24dcp           3.0    3
13         7   visita_11_arm_1     crea          26.9    1
14         7   visita_11_arm_1 dis24dcp           3.0    3
15         7   visita_14_arm_1     crea          17.1    1
16         7   visita_14_arm_1 dis24dcp           3.0    3
17         9   visita_12_arm_1     crea         148.0    1
18         9   visita_12_arm_1 dis24dcp           3.0    3
19        11   visita_11_arm_1     crea          26.1    1
20        11   visita_11_arm_1 dis24dcp           3.0    3
21        11   visita_12_arm_1     crea            NA   NA
22        11   visita_12_arm_1 dis24dcp           3.0    3
23        11    visita_8_arm_1     crea          33.7    1
24        11    visita_8_arm_1 dis24dcp           3.0    3

我能够通过这样做来做到这一点:

A<-df%>%
  select(-contains("_tipo"))%>%
  pivot_longer(cols = c(starts_with("res_orin_")),names_to = c("compound"),names_pattern = c("res_orin_?(.*)"),values_to = "concentration")%>%
  print()

B<-df%>%
  select(cod_child,contains("_tipo"))%>%
  pivot_longer(cols = c(starts_with("res_orin_")),values_to = "tipo")%>%
  print()

dataf<-cbind(A,B[,4])
dataf

但我认为这可能是一种只需一步即可完成的方法。我相信应该有一些与 names_pattern 中的正则表达式相关的东西,但我无法弄清楚。有人可以帮我吗?

解决方法

您可以尝试在旋转之前重命名列:

df%>%rename(res_orin_crea_concentration = res_orin_crea,res_orin_dis24dcp_concentration = res_orin_dis24dcp)%>%
     pivot_longer(cols = !c(cod_child,redcap_event_name),names_to = c("compound",".value"),names_pattern="res_orin_(.+)_(.+)")

# A tibble: 24 x 5
   cod_child redcap_event_name compound concentration  tipo
       <dbl> <chr>             <chr>            <dbl> <dbl>
 1         1 visita_15_arm_1   crea              88.5     1
 2         1 visita_15_arm_1   dis24dcp           0.8     3
 3         1 visita_16_arm_1   crea              58.2     1
 4         1 visita_16_arm_1   dis24dcp          NA       3
 5         1 visita_7_arm_1    crea              70.2     1
 6         1 visita_7_arm_1    dis24dcp           0.9     3
 7         2 visita_10_arm_1   crea              62.4     1
 8         2 visita_10_arm_1   dis24dcp           0.3     3
 9         2 visita_11_arm_1   crea             142       1
10         2 visita_11_arm_1   dis24dcp          NA       3
# … with 14 more rows
,

带有 data.table 函数的 melt 版本

library(data.table)
cod_child<-c(1,1,2,7,9,11,11)
redcap_event_name<-c("visita_15_arm_1","visita_16_arm_1","visita_7_arm_1","visita_10_arm_1","visita_11_arm_1","visita_8_arm_1","visita_14_arm_1","visita_12_arm_1","visita_8_arm_1")
res_orin_crea<-c(88.5,58.2,70.2,62.4,142.0,42.9,26.9,17.1,148.0,26.1,NA,33.7)
res_orin_crea_tipo<-c(1,1)
res_orin_dis24dcp<-c(0.8,0.9,0.3,0.4,0.2,0.1)
res_orin_dis24dcp_tipo<-c(3,3,3)

df<-data.frame(cod_child,redcap_event_name,res_orin_crea,res_orin_crea_tipo,res_orin_dis24dcp,res_orin_dis24dcp_tipo)  

dt <- melt(
  setDT(df),id = 1:2,variable.name = "compound",measure = patterns(concentration = "res_orin_(crea)$|(dis24dcp_tipo)$",tipo = "tipo$")
) 

# show expected format 
dt[,compound:=fifelse(compound == 1,"crea","dis24dcp")][,.SD,by=.(cod_child,redcap_event_name)]
#>     cod_child redcap_event_name compound concentration tipo
#>  1:         1   visita_15_arm_1     crea          88.5    1
#>  2:         1   visita_15_arm_1 dis24dcp           3.0    3
#>  3:         1   visita_16_arm_1     crea          58.2    1
#>  4:         1   visita_16_arm_1 dis24dcp           3.0    3
#>  5:         1    visita_7_arm_1     crea          70.2    1
#>  6:         1    visita_7_arm_1 dis24dcp           3.0    3
#>  7:         2   visita_10_arm_1     crea          62.4    1
#>  8:         2   visita_10_arm_1 dis24dcp           3.0    3
#>  9:         2   visita_11_arm_1     crea         142.0    1
#> 10:         2   visita_11_arm_1 dis24dcp           3.0    3
#> 11:         2    visita_8_arm_1     crea          42.9    1
#> 12:         2    visita_8_arm_1 dis24dcp           3.0    3
#> 13:         7   visita_11_arm_1     crea          26.9    1
#> 14:         7   visita_11_arm_1 dis24dcp           3.0    3
#> 15:         7   visita_14_arm_1     crea          17.1    1
#> 16:         7   visita_14_arm_1 dis24dcp           3.0    3
#> 17:         9   visita_12_arm_1     crea         148.0    1
#> 18:         9   visita_12_arm_1 dis24dcp           3.0    3
#> 19:        11   visita_11_arm_1     crea          26.1    1
#> 20:        11   visita_11_arm_1 dis24dcp           3.0    3
#> 21:        11   visita_12_arm_1     crea            NA   NA
#> 22:        11   visita_12_arm_1 dis24dcp           3.0    3
#> 23:        11    visita_8_arm_1     crea          33.7    1
#> 24:        11    visita_8_arm_1 dis24dcp           3.0    3
#>     cod_child redcap_event_name compound concentration tipo

reprex package (v2.0.0) 于 2021 年 4 月 8 日创建