问题描述
# Set up
library(tidyverse)
library(lubridate)
library(foreach)
# Create data
mydf <- data.frame(
cohort = seq(ymd('2019-01-01'),ymd('2019-12-31'),by = '1 days'),n = rnorm(365,1000,50) %>% round,cohort_cost = rnorm(365,800,50)
) %>%
crossing(tenure_days = 0:365) %>%
mutate(activity_date = cohort + days(tenure_days)) %>%
mutate(daily_revenue = rnorm(nrow(.),20,1)) %>%
group_by(cohort) %>%
arrange(activity_date) %>%
mutate(cumulative_revenue = cumsum(daily_revenue)) %>%
arrange(cohort,activity_date) %>%
mutate(payback_veLocity = round(cumulative_revenue / cohort_cost,2)) %>%
select(cohort,n,cohort_cost,activity_date,tenure_days,everything())
## wider data
mydf_wide <- mydf %>%
select(cohort,payback_veLocity) %>%
group_by(cohort,cohort_cost) %>%
pivot_wider(names_from = tenure_days,values_from = payback_veLocity,names_prefix = 'veLocity_day_') %>%
mutate(Category = rep(LETTERS[1:3],length.out = n()))
models <- data.frame(
from = mydf$tenure_days %>% unique,to = mydf$tenure_days %>% unique
) %>%
expand.grid %>%
filter(to > from) %>%
filter(from > 0) %>%
arrange(from) %>%
mutate(mod_formula = paste0('veLocity_day_',to,' ~ veLocity_day_',from)) %>%
mutate(Category = rep(LETTERS[1:3],length.out = n()))
model_splits <- models %>% split(.$Category)
我有一个数据框,其中每行都包含一个模型规范,我希望将模型作为突变字段来拟合。
运行上面的代码块后,我正在使用的结果数据如下:
model_splits$A %>% glimpse
Rows: 22,144
Columns: 4
$ from <int> 1,1,1…
$ to <int> 2,5,8,11,14,17,23,26,29,32,35,38,41,44,47,50,53,56,59,62,65,68,71,74,77,80,83,86,89,92,95,98,101,104,107,110,…
$ mod_formula <chr> "veLocity_day_2 ~ veLocity_day_1","veLocity_day_5 ~ veLocity_day_1","veLocity_day_8 ~ veLocity_day_1","veLocity_day_11 ~ veLocity_day_1","veLoci…
$ Category <chr> "A","A","A"…
这是包含模型规范的数据框。我还有数据框mydf_wide,如下所示:
mydf_wide %>% head()
# A tibble: 6 x 370
# Groups: cohort,cohort_cost [6]
cohort n cohort_cost veLocity_day_0 veLocity_day_1 veLocity_day_2 veLocity_day_3 veLocity_day_4 veLocity_day_5 veLocity_day_6 veLocity_day_7 veLocity_day_8
<date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2019-01-01 884 723. 0.03 0.05 0.08 0.11 0.14 0.17 0.19 0.22 0.25
2 2019-01-02 1026 698. 0.03 0.06 0.09 0.12 0.15 0.17 0.2 0.23 0.26
3 2019-01-03 911 906. 0.02 0.04 0.07 0.09 0.11 0.13 0.15 0.18 0.2
4 2019-01-04 893 828. 0.02 0.05 0.07 0.1 0.12 0.15 0.17 0.2 0.22
5 2019-01-05 924 821. 0.02 0.05 0.07 0.1 0.12 0.15 0.17 0.2 0.22
6 2019-01-06 1032 797. 0.02 0.05 0.08 0.1 0.13 0.15 0.18 0.2 0.23
在一个循环中,我想循环遍历model_splits
,并在每种情况下使用map来拟合模型:
# fit some models in a loop
foreach::foreach(c = model_splits %>% names,.combine='c') %do% {
df <- model_splits[[c]] %>%
sample_n(3) %>%
mutate(Model = map(.x = mod_formula,~lm(.x,data = mydf_wide %>% filter(Category == c))))
}
Error in { : task 2 Failed - "Problem with `mutate()` input `Model`.
x 0 (non-NA) cases
ℹ Input `Model` is `map(...)`."
我进行了一次谷歌搜索,建议我寻找NA值,但以上两个DF都缺少数据。
所需结果是将新模型突变到数据帧模型的每一行上。
解决方法
问题是因为“ mydf_wide”中只有一个“类别”,导致在lm
通话中出现问题
unique(mydf_wide$Category)
#[1] "A"
如果我们进行更改(仅用于演示)
mydf_wide$Category[91:201] <- "B"
mydf_wide$Category[202:361] <- "C"
代码有效
foreach::foreach(c = model_splits %>% names,.combine='c') %do% {
df <- model_splits[[c]] %>%
sample_n(3) %>%
mutate(Model = map(.x = mod_formula,~lm(.x,data = mydf_wide %>% filter(Category == c))))
}
#$from
#[1] 15 144 4
#$to
#[1] 330 237 137
#$mod_formula
#[1] "velocity_day_330 ~ velocity_day_15" "velocity_day_237 ~ velocity_day_144" "velocity_day_137 ~ velocity_day_4"
#$Category
#[1] "A" "A" "A"
#$Model
#$Model[[1]]
#Call:
#lm(formula = .x,data = mydf_wide %>% filter(Category == c))
#Coefficients:
# (Intercept) velocity_day_15
# 0.4575 19.5458
#
#$Model[[2]]
#Call:
#lm(formula = .x,data = mydf_wide %>% filter(Category == c))
#Coefficients:
# (Intercept) velocity_day_144
# 0.007502 1.639855
#$Model[[3]]
#Call:
#lm(formula = .x,data = mydf_wide %>% filter(Category == c))
#Coefficients:
# (Intercept) velocity_day_4
# 0.7735 21.4259
# ...
或者也可以用tryCatch
或possibly
包装,以确保函数在遇到错误时偶然运行