Skip to content

Commit

Permalink
merge pr #732: tighten logic for iter argument
Browse files Browse the repository at this point in the history
  • Loading branch information
simonpcouch authored Oct 3, 2023
2 parents 637e923 + 93e7d2e commit f6300b1
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 22 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@

* Metrics from apparent resamples are no longer included when estimating performance with `estimate_tune_results()` (and thus with `collect_metrics(..., summarize = TRUE)` and `compute_metrics(..., summarize = TRUE)`). (#714)

* Handles edge cases for `tune_bayes()`' `iter` argument more soundly. For `iter = 0`, the output of `tune_bayes()` should match `tune_grid()`, and `tune_bayes()` will now error when `iter < 0`. `tune_bayes()` will now alter the state of RNG slightly differently, resulting in changed Bayesian optimization search output. (#720)


# tune 1.1.2

* `last_fit()` now works with the 3-way validation split objects from `rsample::initial_validation_split()`. `last_fit()` and `fit_best()` now have a new argument `add_validation_set` to include or exclude the validation set in the dataset used to fit the model (#701).
Expand Down
22 changes: 18 additions & 4 deletions R/tune_bayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,8 @@ tune_bayes.model_spec <- function(object,
))
}

control <- parsnip::condense_control(control, control_bayes())
# set `seed` so that calling `control_bayes()` doesn't alter RNG state (#721)
control <- parsnip::condense_control(control, control_bayes(seed = 1))

wflow <- add_model(workflow(), object)

Expand Down Expand Up @@ -244,7 +245,8 @@ tune_bayes.workflow <-
control = control_bayes(),
eval_time = NULL) {

control <- parsnip::condense_control(control, control_bayes())
# set `seed` so that calling `control_bayes()` doesn't alter RNG state (#721)
control <- parsnip::condense_control(control, control_bayes(seed = 1))

res <-
tune_bayes_workflow(
Expand All @@ -260,7 +262,7 @@ tune_bayes.workflow <-
tune_bayes_workflow <-
function(object, resamples, iter = 10, param_info = NULL, metrics = NULL,
objective = exp_improve(),
initial = 5, control = control_bayes(), eval_time = NULL, ...,
initial = 5, control, eval_time = NULL, ...,
call = caller_env()) {
start_time <- proc.time()[3]

Expand All @@ -269,6 +271,8 @@ tune_bayes_workflow <-
check_rset(resamples)
rset_info <- pull_rset_attributes(resamples)

check_iter(iter, call = call)

metrics <- check_metrics(metrics, object)
check_eval_time(eval_time, metrics)
metrics_data <- metrics_info(metrics)
Expand Down Expand Up @@ -338,7 +342,7 @@ tune_bayes_workflow <-

prev_gp_mod <- NULL

for (i in (1:iter) + score_card$overall_iter) {
for (i in seq_len(iter) + score_card$overall_iter) {
.notes <-
tibble::new_tibble(
list(location = character(0), type = character(0), note = character(0)),
Expand Down Expand Up @@ -494,6 +498,16 @@ create_initial_set <- function(param, n = NULL, checks) {
dials::grid_latin_hypercube(param, size = n)
}

check_iter <- function(iter, call) {
if (!(is.numeric(iter) && length(iter) == 1L && !is.na(iter) && iter >= 0)) {
cli::cli_abort(
"The {.arg iter} argument must be a non-negative integer.",
call = call
)
}

iter
}

# ------------------------------------------------------------------------------

Expand Down
88 changes: 72 additions & 16 deletions tests/testthat/_snaps/bayes.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
i Gaussian process model
! The Gaussian process model is being fit using 1 features but only has 2
data points to do so. This may cause errors or a poor model fit.
! Gaussian process model: X should be in range (0, 1)
v Gaussian process model
i Generating 3 candidates
i Predicted candidates
Expand Down Expand Up @@ -78,7 +77,6 @@
i Fold10: preprocessor 1/1, model 1/1 (predictions)
v Estimating performance
i Gaussian process model
! Gaussian process model: X should be in range (0, 1)
v Gaussian process model
i Generating 2 candidates
i Predicted candidates
Expand Down Expand Up @@ -172,11 +170,10 @@
-- Iteration 1 -----------------------------------------------------------------
i Current best: rmse=2.418 (@iter 0)
i Current best: rmse=2.453 (@iter 0)
i Gaussian process model
! The Gaussian process model is being fit using 1 features but only has 2
data points to do so. This may cause errors or a poor model fit.
! Gaussian process model: X should be in range (0, 1)
v Gaussian process model
i Generating 3 candidates
i Predicted candidates
Expand All @@ -187,16 +184,15 @@
-- Iteration 2 -----------------------------------------------------------------
i Current best: rmse=2.418 (@iter 0)
i Current best: rmse=2.453 (@iter 0)
i Gaussian process model
! Gaussian process model: X should be in range (0, 1)
v Gaussian process model
i Generating 2 candidates
i Predicted candidates
i num_comp=5
i num_comp=3
i Estimating performance
v Estimating performance
(x) Newest results: rmse=2.453 (+/-0.381)
<3 Newest results: rmse=2.418 (+/-0.357)
Output
# Tuning results
# 10-fold cross-validation
Expand Down Expand Up @@ -229,15 +225,14 @@
-- Iteration 1 -----------------------------------------------------------------
i Current best: rmse=2.418 (@iter 0)
i Current best: rmse=2.453 (@iter 0)
i Gaussian process model
! The Gaussian process model is being fit using 1 features but only has 2
data points to do so. This may cause errors or a poor model fit.
! Gaussian process model: X should be in range (0, 1)
v Gaussian process model
i Generating 3 candidates
i Predicted candidates
i num_comp=2
i num_comp=4
i Estimating performance
i Fold01: preprocessor 1/1
v Fold01: preprocessor 1/1
Expand Down Expand Up @@ -300,17 +295,16 @@
i Fold10: preprocessor 1/1, model 1/1 (extracts)
i Fold10: preprocessor 1/1, model 1/1 (predictions)
v Estimating performance
(x) Newest results: rmse=2.666 (+/-0.281)
(x) Newest results: rmse=2.461 (+/-0.37)
-- Iteration 2 -----------------------------------------------------------------
i Current best: rmse=2.418 (@iter 0)
i Current best: rmse=2.453 (@iter 0)
i Gaussian process model
! Gaussian process model: X should be in range (0, 1)
v Gaussian process model
i Generating 2 candidates
i Predicted candidates
i num_comp=5
i num_comp=3
i Estimating performance
i Fold01: preprocessor 1/1
v Fold01: preprocessor 1/1
Expand Down Expand Up @@ -373,7 +367,7 @@
i Fold10: preprocessor 1/1, model 1/1 (extracts)
i Fold10: preprocessor 1/1, model 1/1 (predictions)
v Estimating performance
(x) Newest results: rmse=2.453 (+/-0.381)
<3 Newest results: rmse=2.418 (+/-0.357)
Output
# Tuning results
# 10-fold cross-validation
Expand Down Expand Up @@ -458,6 +452,7 @@
! The Gaussian process model is being fit using 1 features but only has 2
data points to do so. This may cause errors or a poor model fit.
! Gaussian process model: X should be in range (0, 1)
! Gaussian process model: X should be in range (0, 1)

# too few starting values

Expand Down Expand Up @@ -526,25 +521,46 @@
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 1 missing value was found and removed before fitting
the Gaussian process model.
! The Gaussian process model is being fit using 1 features but only has 2
data points to do so. This may cause errors or a poor model fit.
! Gaussian process model: X should be in range (0, 1)
! For the rsq estimates, 1 missing value was found and removed before fitting
the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 2 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 3 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 4 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 5 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 6 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 7 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 8 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! For the rsq estimates, 9 missing values were found and removed before
fitting the Gaussian process model.
! Gaussian process model: X should be in range (0, 1)
! validation: internal: A correlation computation is required, but `estimate` is constant and ha...
! No improvement for 10 iterations; returning current results.

---

Expand All @@ -570,3 +586,43 @@
Message
x Optimization stopped prematurely; returning current results.

# tune_bayes() output for `iter` edge cases (#721)

Code
tune_bayes(wf, boots, iter = -1)
Condition
Error in `tune_bayes()`:
! The `iter` argument must be a non-negative integer.

---

Code
tune_bayes(wf, boots, iter = c(-1, 0, 1))
Condition
Error in `tune_bayes()`:
! The `iter` argument must be a non-negative integer.

---

Code
tune_bayes(wf, boots, iter = c(0, 1, 2))
Condition
Error in `tune_bayes()`:
! The `iter` argument must be a non-negative integer.

---

Code
tune_bayes(wf, boots, iter = NA)
Condition
Error in `tune_bayes()`:
! The `iter` argument must be a non-negative integer.

---

Code
tune_bayes(wf, boots, iter = NULL)
Condition
Error in `tune_bayes()`:
! The `iter` argument must be a non-negative integer.

57 changes: 55 additions & 2 deletions tests/testthat/test-bayes.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ test_that("tune recipe only", {
expect_false(identical(num_comp, expr(tune())))
expect_true(res_workflow$trained)

set.seed(1)
expect_error(
suppressMessages(
tune_bayes(
Expand All @@ -70,6 +71,7 @@ test_that("tune recipe only", {


# test verbose options
set.seed(1)
expect_snapshot(
tune_bayes(
wflow,
Expand All @@ -81,6 +83,7 @@ test_that("tune recipe only", {
)
)

set.seed(1)
expect_snapshot(
tune_bayes(
wflow,
Expand All @@ -92,6 +95,7 @@ test_that("tune recipe only", {
)
)

set.seed(1)
expect_snapshot(
tune_bayes(
wflow,
Expand All @@ -102,8 +106,6 @@ test_that("tune recipe only", {
control = control_bayes(verbose_iter = TRUE, verbose = TRUE)
)
)


})

# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -534,3 +536,54 @@ test_that("missing performance values", {
)
})
})

# ------------------------------------------------------------------------------
test_that("tune_bayes() output for `iter` edge cases (#721)", {
# for `iter = 0`, ought to match `tune_grid()`
boots <- rsample::bootstraps(mtcars)
wf <-
workflows::workflow(
mpg ~ .,
parsnip::nearest_neighbor("regression", "kknn", neighbors = tune())
)

ctrl_bayes <- control_bayes(seed = 1)

set.seed(1)
res_bayes <- tune_bayes(wf, boots, iter = 0, initial = 10,
control = ctrl_bayes)

set.seed(1)
res_grid <- tune_grid(wf, boots)

expect_equal(
collect_metrics(res_bayes) %>% dplyr::select(-.iter),
collect_metrics(res_grid)
)

# for `iter < 0`, ought to error
expect_snapshot(
error = TRUE,
tune_bayes(wf, boots, iter = -1)
)

expect_snapshot(
error = TRUE,
tune_bayes(wf, boots, iter = c(-1, 0, 1))
)

expect_snapshot(
error = TRUE,
tune_bayes(wf, boots, iter = c(0, 1, 2))
)

expect_snapshot(
error = TRUE,
tune_bayes(wf, boots, iter = NA)
)

expect_snapshot(
error = TRUE,
tune_bayes(wf, boots, iter = NULL)
)
})

0 comments on commit f6300b1

Please sign in to comment.