R：通过匹配另一个数据帧的列来插入和外推数据帧中的值

问题描述

我有两个数据框：

df1 <- data.frame(levels = c(1,3,5,7,9),values = c(2.2,5.3,7.9,5.4,8.7))

df2 <- data.frame(levels = c(1,4,8,12)) # other columns not necessary

我希望根据 df1$levels 中的数字将 df1$values 插入到 df2$levels 中。所以有一些内插，但也有外推到第二个数据帧中的第 12 级。

解决方法

也许，根据两个数据集的 complete 的 union 执行 levels，然后将 na.approx（来自 zoo）与 {{1} }（用于外推）

rule = 2

-输出

library(dplyr)
library(tidyr)
library(zoo)
df1 <- df1 %>% 
    complete(levels = union(levels,df2$levels)) %>%
    mutate(values = na.approx(values,maxgap = Inf,rule = 2))

我确定这可以压缩，这是我很久以前写的一些代码，用于处理必须在有序向量的头部/尾部进行外推的情况：

# Function to interpolate / extrapolate: l_estimate => function()
l_estimate <- function(vec){
  # Function to perform-linear interpolation and return vector: 
  # .l_interp_vec => function()
  .l_interp_vec <- function(vec){
    interped_values <- 
      approx(x = vec,method = "linear",ties = "constant",n = length(vec))$y
    return(ifelse(is.na(vec),interped_values[is.na(vec)],vec))
  }
  
  # Store a vector denoting the indices of the vector that are NA: 
  # na_idx => integer vector
  na_idx <- is.na(vec)
  
  # Store a scalar of min row where x isn't NA: min_non_na => integer vector
  min_non_na <- min(which(!(na_idx)))
  
  # Store a scalar of max row where x isn't NA: max_non_na => integer vector
  max_non_na <- max(which(!(na_idx)))
  
  # Store scalar of the number of rows needed to impute prior 
  # to first NA value: ru_lower => integer vector
  ru_lower <- ifelse(min_non_na > 1,min_non_na - 1,min_non_na)
  
  # Store scalar of the number of rows needed to impute after
  # the last non-NA value: ru_upper => integer vector
  ru_upper <- ifelse(
    max_non_na == length(vec),length(vec) - 1,(length(vec) - (max_non_na + 1))
  )
  
  # Store a vector of the ramp to function: ramp_up => numeric vector 
  ramp_up <- as.numeric(
    cumsum(rep(vec[min_non_na]/(min_non_na),ru_lower))
  )
  
  # Apply the interpolation function on vector: y => numeric vector
  y <- as.numeric(.l_interp_vec(as.numeric(vec[min_non_na:max_non_na])))
  
  # Create a vector that combines the ramp_up vector 
  # and y if the first NA is at row 1:
  if(length(ramp_up) >= 1 & max_non_na != length(vec)){
    # Create a vector interpolations if there are 
    # multiple NA values after the last value: lower_l_int => numeric vector
    lower_l_int <- as.numeric(
      cumsum(rep(mean(diff(c(ramp_up,y))),ru_upper+1)) + 
        as.numeric(vec[max_non_na])
      )
    
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(c(ramp_up,y,lower_l_int))
  
  }else if(length(ramp_up) > 1 & max_non_na == length(vec)){
    
    # Store the linear interpolations in  a vector: z => numeric
    z <- as.numeric(c(ramp_up,y))
    
  }else if(min_non_na == 1 & max_non_na != length(vec)){
    
    # Create a vector interpolations if there are 
    # multiple NA values after the last value: lower_l_int => numeric vector
    lower_l_int <- as.numeric(
      cumsum(rep(mean(diff(c(ramp_up,ru_upper+1)) +
        as.numeric(vec[max_non_na])
      )
    
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(c(y,lower_l_int))
    
  }else{
    # Store the linear interpolations in  a vector: z => numeric vector
    z <- as.numeric(y)
    
  }
  # Interpolate between points in x,return new x:
  return(as.numeric(ifelse(is.na(vec),z,vec)))
}

# Apply the function on ordered data: data.frame => stdout(console)
transform(full_df[order(full_df$levels),],values = l_estimate(values)
)

extrapolation interpolation r

R：通过匹配另一个数据帧的列来插入和外推数据帧中的值

问题描述

解决方法

相关问答