{中的错误:任务2失败-“ mutate输入输入模型有问题x0非NA用例ℹ输入模型为map...”

问题描述

这是一个代码块,用于在遇到错误消息时复制一些数据:

# Set up
library(tidyverse)
library(lubridate)
library(foreach)

# Create data
mydf <- data.frame(
  cohort = seq(ymd('2019-01-01'),ymd('2019-12-31'),by = '1 days'),n = rnorm(365,1000,50) %>% round,cohort_cost = rnorm(365,800,50)
) %>% 
  crossing(tenure_days = 0:365) %>% 
  mutate(activity_date = cohort + days(tenure_days)) %>% 
  mutate(daily_revenue = rnorm(nrow(.),20,1)) %>% 
  group_by(cohort) %>% 
  arrange(activity_date) %>% 
  mutate(cumulative_revenue = cumsum(daily_revenue)) %>% 
  arrange(cohort,activity_date) %>% 
  mutate(payback_veLocity = round(cumulative_revenue / cohort_cost,2)) %>% 
  select(cohort,n,cohort_cost,activity_date,tenure_days,everything())

## wider data
mydf_wide <- mydf %>% 
  select(cohort,payback_veLocity) %>% 
  group_by(cohort,cohort_cost) %>% 
  pivot_wider(names_from = tenure_days,values_from = payback_veLocity,names_prefix = 'veLocity_day_') %>% 
  mutate(Category = rep(LETTERS[1:3],length.out = n()))

models <- data.frame(
  from = mydf$tenure_days %>% unique,to = mydf$tenure_days %>% unique
) %>% 
  expand.grid %>% 
  filter(to > from) %>% 
  filter(from > 0) %>% 
  arrange(from) %>% 
  mutate(mod_formula = paste0('veLocity_day_',to,' ~ veLocity_day_',from)) %>% 
  mutate(Category = rep(LETTERS[1:3],length.out = n()))

model_splits <- models %>% split(.$Category)

我有一个数据框,其中每行都包含一个模型规范,我希望将模型作为突变字段来拟合。

运行上面的代码块后,我正在使用的结果数据如下:

model_splits$A %>% glimpse
Rows: 22,144
Columns: 4
$ from        <int> 1,1,1…
$ to          <int> 2,5,8,11,14,17,23,26,29,32,35,38,41,44,47,50,53,56,59,62,65,68,71,74,77,80,83,86,89,92,95,98,101,104,107,110,…
$ mod_formula <chr> "veLocity_day_2 ~ veLocity_day_1","veLocity_day_5 ~ veLocity_day_1","veLocity_day_8 ~ veLocity_day_1","veLocity_day_11 ~ veLocity_day_1","veLoci…
$ Category    <chr> "A","A","A"…

这是包含模型规范的数据框。我还有数据框mydf_wide,如下所示:

mydf_wide %>% head()
# A tibble: 6 x 370
# Groups:   cohort,cohort_cost [6]
  cohort         n cohort_cost veLocity_day_0 veLocity_day_1 veLocity_day_2 veLocity_day_3 veLocity_day_4 veLocity_day_5 veLocity_day_6 veLocity_day_7 veLocity_day_8
  <date>     <dbl>       <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>          <dbl>
1 2019-01-01   884        723.           0.03           0.05           0.08           0.11           0.14           0.17           0.19           0.22           0.25
2 2019-01-02  1026        698.           0.03           0.06           0.09           0.12           0.15           0.17           0.2            0.23           0.26
3 2019-01-03   911        906.           0.02           0.04           0.07           0.09           0.11           0.13           0.15           0.18           0.2 
4 2019-01-04   893        828.           0.02           0.05           0.07           0.1            0.12           0.15           0.17           0.2            0.22
5 2019-01-05   924        821.           0.02           0.05           0.07           0.1            0.12           0.15           0.17           0.2            0.22
6 2019-01-06  1032        797.           0.02           0.05           0.08           0.1            0.13           0.15           0.18           0.2            0.23

一个循环中,我想循环遍历model_splits,并在每种情况下使用map来拟合模型:

# fit some models in a loop
foreach::foreach(c = model_splits %>% names,.combine='c') %do% {
  df <- model_splits[[c]] %>%
     sample_n(3) %>% 
    mutate(Model = map(.x = mod_formula,~lm(.x,data = mydf_wide %>% filter(Category == c))))
}
Error in { : task 2 Failed - "Problem with `mutate()` input `Model`.
x 0 (non-NA) cases
ℹ Input `Model` is `map(...)`."

我进行了一次谷歌搜索,建议我寻找NA值,但以上两个DF都缺少数据。

所需结果是将新模型突变到数据帧模型的每一行上。

解决方法

问题是因为“ mydf_wide”中只有一个“类别”,导致在lm通话中出现问题

unique(mydf_wide$Category)
#[1] "A"

如果我们进行更改(仅用于演示)

mydf_wide$Category[91:201] <- "B"
mydf_wide$Category[202:361] <- "C"

代码有效

foreach::foreach(c = model_splits %>% names,.combine='c') %do% {
   df <- model_splits[[c]] %>%
      sample_n(3) %>% 
     mutate(Model = map(.x = mod_formula,~lm(.x,data = mydf_wide %>% filter(Category == c))))
 }
#$from
#[1]  15 144   4

#$to
#[1] 330 237 137

#$mod_formula
#[1] "velocity_day_330 ~ velocity_day_15"  "velocity_day_237 ~ velocity_day_144" "velocity_day_137 ~ velocity_day_4"  

#$Category
#[1] "A" "A" "A"

#$Model
#$Model[[1]]

#Call:
#lm(formula = .x,data = mydf_wide %>% filter(Category == c))

#Coefficients:
#    (Intercept)  velocity_day_15  
#         0.4575          19.5458  
# 

#$Model[[2]]

#Call:
#lm(formula = .x,data = mydf_wide %>% filter(Category == c))

#Coefficients:
#     (Intercept)  velocity_day_144  
#        0.007502          1.639855  


#$Model[[3]]

#Call:
#lm(formula = .x,data = mydf_wide %>% filter(Category == c))

#Coefficients:
#   (Intercept)  velocity_day_4  
#       0.7735         21.4259  
# ...

或者也可以用tryCatchpossibly包装,以确保函数在遇到错误时偶然运行