有没有一种方法可以使用水平顺序替换R中的NA?

问题描述

我有以下数据框:

df <-structure(list(time = c("12:00:00","12:05:00","12:10:00","12:15:00","12:20:00","12:25:00","12:30:00","12:35:00","12:40:00","12:45:00","12:50:00","12:55:00","13:00:00","13:05:00","13:10:00","13:15:00","13:20:00","13:25:00"),speedA = c(60L,75L,65L,45L,12L,15L,20L,60L,30L,35L,25L,10L,5L
),speedB = c(50L,NA,40L,18L,NA),speedC = c(60L,50L,34L,64L,7L,60L),speedD = c(NA,55L,36L,39L,48L,47L)),class = "data.frame",row.names = c(NA,-18L))

我想通过在每个NA同一行的水平值之间进行插值来用值替换NA。 预期结果:

df2<- structure(list(time = c("12:00:00",speedB = c(50,30,63.33333,40,21,15,18,52.5,65,47.5,10,25,54.5,17.5,12,35,32.5),speedC = c(60,61.66667,50,60,34,64,7,60),speedD = c(60L,-18L))

解决方法

我们可以使用zoo::na.approx来插值。对于无法插值的值(最后是NA个值,我们使用tidyr::fill进行填充。

library(dplyr)
library(tidyr)

df %>%
  pivot_longer(cols = -time) %>%
  group_by(time) %>%
  mutate(value = zoo::na.approx(value,na.rm = FALSE)) %>%
  fill(value) %>%
  pivot_wider()

#   time     speedA speedB speedC speedD
#   <chr>     <dbl>  <dbl>  <dbl>  <dbl>
# 1 12:00:00     60 50     60         60
# 2 12:05:00     75 30     25         10
# 3 12:10:00     65 63.333 61.667     60
# 4 12:15:00     45 40     40         40
# 5 12:20:00     12 21     30         50
# 6 12:25:00     15 15     15         55
# 7 12:30:00     20 18     50         45
# 8 12:35:00     45 52.5   60         35
# 9 12:40:00     65 65     65         65
#10 12:45:00     60 47.5   35         35
#11 12:50:00     60 15     34         45
#12 12:55:00     30 10     35         60
#13 13:00:00     35 25     15         35
#14 13:05:00     45 54.5   64         34
#15 13:10:00     25 17.5   10         36
#16 13:15:00     15 12      7         39
#17 13:20:00     10 35     60         48
#18 13:25:00      5 32.5   60         47
,

您可以将zoo::na.approx()c_across()逐行使用。

library(dplyr)
library(tidyr)
library(zoo)

df %>%
  rowwise() %>%
  mutate(speed = list(na.locf(na.approx(c_across(-time),na.rm = FALSE))),.keep = "unused") %>%
  unnest_wider(speed,names_sep = "")

# # A tibble: 18 x 5
#    time     speed1 speed2 speed3 speed4
#    <chr>     <dbl>  <dbl>  <dbl>  <dbl>
#  1 12:00:00     60   50     60       60
#  2 12:05:00     75   30     25       10
#  3 12:10:00     65   63.3   61.7     60
#  4 12:15:00     45   40     40       40
#  5 12:20:00     12   21     30       50
#  6 12:25:00     15   15     15       55
#  7 12:30:00     20   18     50       45
#  8 12:35:00     45   52.5   60       35
#  9 12:40:00     65   65     65       65
# 10 12:45:00     60   47.5   35       35
# 11 12:50:00     60   15     34       45
# 12 12:55:00     30   10     35       60
# 13 13:00:00     35   25     15       35
# 14 13:05:00     45   54.5   64       34
# 15 13:10:00     25   17.5   10       36
# 16 13:15:00     15   12      7       39
# 17 13:20:00     10   35     60       48
# 18 13:25:00      5   32.5   60       47