step_rose在调优网格中失败

问题描述

我注意到当使用某些引擎(例如keras和xgboost)进行训练时,配方返回的ys比Xs多。

在这里,您会找到一个可重现的最小示例

Error in data.frame(ynew,Xnew): arguments imply differing number of rows: 385,386

产生的错误是{{1}}

解决方法

这与调整over_ratio有关。如果您跳过它,则该示例将正常工作。

library(tidymodels)
#> ── Attaching packages ────────────────────────────────────── tidymodels 0.1.1   
library(themis)
data(iris)

iris_imbalance <- iris %>%
  filter(Species != "setosa") %>% 
  slice_sample(n = 60,weight_by = case_when(
                                    Species == "virginica" ~ 60,TRUE ~ 1)) %>% 
  mutate(Species = factor(Species))

xg_mod <- parsnip::boost_tree(mode = "classification",trees = tune(),tree_depth = tune(),min_n = tune(),loss_reduction = tune(),learn_rate = tune()) %>%
  set_engine("xgboost")

xg_grid <- grid_latin_hypercube(#over_ratio(range = c(0,1)),trees(),tree_depth(),min_n(),loss_reduction(),learn_rate(),size = 5)

my_recipe <- recipe(Species ~ .,data = iris_imbalance) %>%
  step_rose(Species) #,over_ratio = tune())

workflow() %>%
  add_model(xg_mod) %>%
  add_recipe(my_recipe) %>%
  tune_grid(resamples = mc_cv(iris_imbalance,strata = Species),grid = xg_grid)
#> # Tuning results
#> # Monte Carlo cross-validation (0.75/0.25) with 25 resamples  using stratification 
#> # A tibble: 25 x 4
#>    splits          id         .metrics          .notes          
#>    <list>          <chr>      <list>            <list>          
#>  1 <split [46/14]> Resample01 <tibble [10 × 9]> <tibble [0 × 1]>
#>  2 <split [46/14]> Resample02 <tibble [10 × 9]> <tibble [0 × 1]>
#>  3 <split [46/14]> Resample03 <tibble [10 × 9]> <tibble [0 × 1]>
#>  4 <split [46/14]> Resample04 <tibble [10 × 9]> <tibble [0 × 1]>
#>  5 <split [46/14]> Resample05 <tibble [10 × 9]> <tibble [0 × 1]>
#>  6 <split [46/14]> Resample06 <tibble [10 × 9]> <tibble [0 × 1]>
#>  7 <split [46/14]> Resample07 <tibble [10 × 9]> <tibble [0 × 1]>
#>  8 <split [46/14]> Resample08 <tibble [10 × 9]> <tibble [0 × 1]>
#>  9 <split [46/14]> Resample09 <tibble [10 × 9]> <tibble [0 × 1]>
#> 10 <split [46/14]> Resample10 <tibble [10 × 9]> <tibble [0 × 1]>
#> # … with 15 more rows

reprex package(v0.3.0)于2020-11-13创建