tidymodels · simonpcouch · Oct 6, 2023 · Sep 28, 2023 · Oct 4, 2023 · simonpcouch
diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,8 @@
 
 * `last_fit()` will now error when supplied a fitted workflow. (#678)
 
+* Fixes bug where `.notes` entries were sorted in the wrong order in tuning results for resampling schemes with IDs that aren't already in alphabetical order. (#728)
+
 * A method for rsample's `int_pctl()` function that will compute percentile confidence intervals on performance metrics for objects produced by `fit_resamples()`, `tune_*()`, and `last_fit()`. 
 
 * Fixes bug where `.config` entries in the `.extracts` column in `tune_bayes()` output didn't align with the entries they ought to in the `.metrics` and `.predictions` columns (#715).

diff --git a/R/grid_code_paths.R b/R/grid_code_paths.R
@@ -22,10 +22,14 @@ tune_grid_loop <- function(resamples,
     rng
   )
 
-  resamples <- pull_metrics(resamples, results, control)
-  resamples <- pull_notes(resamples, results, control)
-  resamples <- pull_extracts(resamples, results, control)
-  resamples <- pull_predictions(resamples, results, control)
+  # carry out arranging by id before extracting each element of results (#728)
+  resample_ids <- grep("^id", names(resamples), value = TRUE)
+  id_order <- vctrs::vec_order(resamples[resample_ids])
+
+  resamples <- pull_metrics(resamples, results, control, order = id_order)
+  resamples <- pull_notes(resamples, results, control, order = id_order)
+  resamples <- pull_extracts(resamples, results, control, order = id_order)
+  resamples <- pull_predictions(resamples, results, control, order = id_order)
   resamples <- pull_all_outcome_names(resamples, results)
 
   resamples

diff --git a/R/pull.R b/R/pull.R
@@ -10,7 +10,7 @@ extract_details <- function(object, extractor) {
 
 # Grab the new results, make sure that they align row-wise with the rsample
 # object and then bind columns
-pulley <- function(resamples, res, col) {
+pulley <- function(resamples, res, col, order) {
   if (all(purrr::map_lgl(res, inherits, "simpleError"))) {
     res <-
       resamples %>%
@@ -22,7 +22,9 @@ pulley <- function(resamples, res, col) {
   all_null <- all(purrr::map_lgl(res, is.null))
 
   id_cols <- grep("^id", names(resamples), value = TRUE)
-  resamples <- dplyr::arrange(resamples, !!!syms(id_cols))
+
+  resamples <- vctrs::vec_slice(resamples, order)
+
   pulled_vals <- purrr::map(res, ~ .x[[col]]) %>% purrr::list_rbind()
 
   if (nrow(pulled_vals) == 0) {
@@ -65,22 +67,22 @@ maybe_repair <- function(x) {
 }
 
 
-pull_metrics <- function(resamples, res, control) {
-  out <- pulley(resamples, res, ".metrics")
+pull_metrics <- function(resamples, res, control, order) {
+  out <- pulley(resamples, res, ".metrics", order = order)
   out$.metrics <- maybe_repair(out$.metrics)
   out
 }
 
-pull_extracts <- function(resamples, res, control) {
+pull_extracts <- function(resamples, res, control, order) {
   if (!is.null(control$extract)) {
-    resamples <- pulley(resamples, res, ".extracts")
+    resamples <- pulley(resamples, res, ".extracts", order = order)
   }
   resamples
 }
 
-pull_predictions <- function(resamples, res, control) {
+pull_predictions <- function(resamples, res, control, order) {
   if (control$save_pred) {
-    resamples <- pulley(resamples, res, ".predictions")
+    resamples <- pulley(resamples, res, ".predictions", order = order)
     resamples$.predictions <- maybe_repair(resamples$.predictions)
   }
   resamples
@@ -126,8 +128,10 @@ ensure_tibble <- function(x) {
   res
 }
 
-pull_notes <- function(resamples, res, control) {
-  resamples$.notes <- purrr::map(res, ~ purrr::pluck(.x, ".notes"))
+pull_notes <- function(resamples, res, control, order) {
+  notes <- purrr::map(res, ~ purrr::pluck(.x, ".notes"))
+  resamples$.notes <- notes[order]
+
   resamples
 }
 

diff --git a/tests/testthat/test-notes.R b/tests/testthat/test-notes.R
@@ -71,3 +71,19 @@ test_that("showing notes", {
   expect_snapshot(show_notes(fit_lr))
 
 })
+
+test_that("notes are sorted in the correct order", {
+  # set `apparent = TRUE` so that resamples aren't in alphabetical order by id
+  mt_boots <- bootstraps(mtcars, 3, apparent = TRUE)
+
+  # induce the size zero yardstick error in Bootstrap1
+  mt_boots$splits[[1]]$out_id <- numeric(0)
+
+  suppressMessages({
+    mt_res <- fit_resamples(linear_reg(), mpg ~ ., mt_boots)
+  })
+
+  boots_1_loc <- which(mt_res$id == "Bootstrap1")
+  boots_1_notes <- mt_res$.notes[[boots_1_loc]]
+  expect_equal(nrow(boots_1_notes), 1)
+})