Open
Description
I get an error message when trying to fit a model using fit_xy()
in presence of missing values but not with fit()
.
Here is just one example with rand_forest() %>% set_engine("ranger")
. The same problem occurs with rand_forest() %>% set_engine("randomForest")
or linear_reg(penalty = 1, mixture = 1) %>% set_engine("glmnet")
.
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
data(airquality)
summary(airquality)
#> Ozone Solar.R Wind Temp
#> Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00
#> 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00
#> Median : 31.50 Median :205.0 Median : 9.700 Median :79.00
#> Mean : 42.13 Mean :185.9 Mean : 9.958 Mean :77.88
#> 3rd Qu.: 63.25 3rd Qu.:258.8 3rd Qu.:11.500 3rd Qu.:85.00
#> Max. :168.00 Max. :334.0 Max. :20.700 Max. :97.00
#> NA's :37 NA's :7
#> Month Day
#> Min. :5.000 Min. : 1.0
#> 1st Qu.:6.000 1st Qu.: 8.0
#> Median :7.000 Median :16.0
#> Mean :6.993 Mean :15.8
#> 3rd Qu.:8.000 3rd Qu.:23.0
#> Max. :9.000 Max. :31.0
#>
mod1 <- rand_forest() %>%
set_engine("ranger") %>%
set_mode("regression") %>%
fit(Ozone ~ Solar.R + Temp + Wind, data = airquality)
mod2 <- rand_forest() %>%
set_engine("ranger") %>%
set_mode("regression") %>%
fit_xy(x = airquality[,c("Solar.R", "Temp", "Wind")], y = airquality$Ozone)
#> Error: Missing data in columns: Solar.R.
#> Chronométrage arrêté à : 0 0 0.001
Created on 2021-08-29 by the reprex package (v2.0.1)