From 7586614b284d431a72d3d1f9a1d7a82ec47e05c9 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Tue, 21 May 2024 14:43:44 +0100 Subject: [PATCH 01/10] extract helper function --- R/misc.R | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/R/misc.R b/R/misc.R index e1dc44e2..236f367f 100644 --- a/R/misc.R +++ b/R/misc.R @@ -271,13 +271,9 @@ reshuffle_rset <- function(rset) { } } - arguments <- attributes(rset) - useful_arguments <- names(formals(arguments$class[[1]])) - useful_arguments <- arguments[useful_arguments] - useful_arguments <- useful_arguments[!is.na(names(useful_arguments))] - if (identical(useful_arguments$strata, FALSE)) { - useful_arguments$strata <- NULL - } else if (identical(useful_arguments$strata, TRUE)) { + rset_type <- class(rset)[[1]] + split_arguments <- get_split_args(rset) + if (identical(split_arguments$strata, TRUE)) { rlang::abort( "Cannot reshuffle this rset (`attr(rset, 'strata')` is `TRUE`, not a column identifier)", i = "If the original object was created with an older version of rsample, try recreating it with the newest version of the package" @@ -285,8 +281,8 @@ reshuffle_rset <- function(rset) { } do.call( - arguments$class[[1]], - c(list(data = rset$splits[[1]]$data), useful_arguments) + rset_type, + c(list(data = rset$splits[[1]]$data), split_arguments) ) } @@ -299,6 +295,17 @@ non_random_classes <- c( "validation_set" ) +get_split_args <- function(rset) { + all_attributes <- attributes(rset) + args <- names(formals(all_attributes$class[[1]])) + split_args <- all_attributes[args] + split_args <- split_args[!is.na(names(split_args))] + if (identical(split_args$strata, FALSE)) { + split_args$strata <- NULL + } + split_args +} + #' Retrieve individual rsplits objects from an rset #' #' @param x The `rset` object to retrieve an rsplit from. From 7bbc732494d11f8aaf48cf0babcf9d5b08c4247a Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Tue, 21 May 2024 14:46:58 +0100 Subject: [PATCH 02/10] `inner_split()` generic + MC methods --- NAMESPACE | 3 ++ R/inner_split.R | 58 +++++++++++++++++++++++++++++++ man/inner_split.Rd | 28 +++++++++++++++ tests/testthat/test-inner_split.R | 56 +++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+) create mode 100644 R/inner_split.R create mode 100644 man/inner_split.Rd create mode 100644 tests/testthat/test-inner_split.R diff --git a/NAMESPACE b/NAMESPACE index 19772f54..1b4f7bfb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,8 @@ S3method(dim,initial_validation_split) S3method(dim,rsplit) S3method(get_rsplit,default) S3method(get_rsplit,rset) +S3method(inner_split,grouped_mc_split) +S3method(inner_split,mc_split) S3method(int_bca,bootstraps) S3method(int_pctl,bootstraps) S3method(int_t,bootstraps) @@ -368,6 +370,7 @@ export(initial_split) export(initial_time_split) export(initial_validation_split) export(initial_validation_time_split) +export(inner_split) export(int_bca) export(int_pctl) export(int_t) diff --git a/R/inner_split.R b/R/inner_split.R new file mode 100644 index 00000000..1f7bd51f --- /dev/null +++ b/R/inner_split.R @@ -0,0 +1,58 @@ +#' Inner split of the analysis set for fitting a post-processor +#' +#' @param x An `rsplit` object. +#' @param split_args A list of arguments to be used for the inner split. +#' @param ... Not currently used. +#' @return An `rsplit` object. +#' @keywords internal +#' @export +inner_split <- function(x, ...) { + UseMethod("inner_split") +} + +# mc --------------------------------------------------------------------- + +#' @rdname inner_split +#' @export +inner_split.mc_split <- function(x, split_args, ...) { + check_dots_empty() + + analysis_set <- analysis(x) + + split_inner <- mc_splits( + analysis_set, + prop = split_args$prop, + times = 1, + strata = split_args$strata, + breaks = split_args$breaks, + pool = split_args$pool + ) + split_inner <- split_inner$splits[[1]] + + class_inner <- paste0(class(x)[1], "_inner") + split_inner <- add_class(split_inner, class_inner) + split_inner +} + +#' @rdname inner_split +#' @export +inner_split.grouped_mc_split <- function(x, split_args, ...) { + # FIXME update this class to `group_mc_split` + check_dots_empty() + + analysis_set <- analysis(x) + + split_inner <- group_mc_splits( + analysis_set, + group = split_args$group, + prop = split_args$prop, + times = 1, + strata = split_args$strata, + pool = split_args$pool + ) + split_inner <- split_inner$splits[[1]] + + class_inner <- paste0(class(x)[1], "_inner") + split_inner <- add_class(split_inner, class_inner) + split_inner +} diff --git a/man/inner_split.Rd b/man/inner_split.Rd new file mode 100644 index 00000000..b528f6aa --- /dev/null +++ b/man/inner_split.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inner_split.R +\name{inner_split} +\alias{inner_split} +\alias{inner_split.mc_split} +\alias{inner_split.grouped_mc_split} +\title{Inner split of the analysis set for fitting a post-processor} +\usage{ +inner_split(x, ...) + +\method{inner_split}{mc_split}(x, split_args, ...) + +\method{inner_split}{grouped_mc_split}(x, split_args, ...) +} +\arguments{ +\item{x}{An \code{rsplit} object.} + +\item{...}{Not currently used.} + +\item{split_args}{A list of arguments to be used for the inner split.} +} +\value{ +An \code{rsplit} object. +} +\description{ +Inner split of the analysis set for fitting a post-processor +} +\keyword{internal} diff --git a/tests/testthat/test-inner_split.R b/tests/testthat/test-inner_split.R new file mode 100644 index 00000000..8d2aeb53 --- /dev/null +++ b/tests/testthat/test-inner_split.R @@ -0,0 +1,56 @@ + +# mc --------------------------------------------------------------------- + +test_that("mc_split", { + set.seed(11) + r_set <- mc_cv(warpbreaks) + split_args <- get_split_args(r_set) + r_split <- get_rsplit(r_set, 1) + + isplit <- inner_split(r_split, split_args) + + expect_identical( + isplit$data, + analysis(r_split) + ) + + expect_identical( + analysis(isplit), + isplit$data[isplit$in_id, ], + ignore_attr = "row.names" + ) + expect_identical( + assessment(isplit), + isplit$data[isplit$out_id, ], + ignore_attr = "row.names" + ) +}) + +test_that("group_mc_split", { + skip_if_not_installed("modeldata") + + data(ames, package = "modeldata", envir = rlang::current_env()) + + set.seed(11) + r_set <- group_mc_cv(ames, "MS_SubClass") + split_args <- get_split_args(r_set) + r_split <- get_rsplit(r_set, 1) + + isplit <- inner_split(r_split, split_args) + + expect_identical( + isplit$data, + analysis(r_split) + ) + + expect_identical( + analysis(isplit), + isplit$data[isplit$in_id, ], + ignore_attr = "row.names" + ) + expect_identical( + assessment(isplit), + isplit$data[isplit$out_id, ], + ignore_attr = "row.names" + ) +}) From c966979090cbfa5ace6ab3ce3b9823b9a80e5cd1 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Tue, 21 May 2024 14:50:47 +0100 Subject: [PATCH 03/10] add `inner_split()` method for vfold cv --- NAMESPACE | 2 ++ R/inner_split.R | 60 +++++++++++++++++++++++++++++++ man/inner_split.Rd | 6 ++++ tests/testthat/test-inner_split.R | 57 +++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 1b4f7bfb..c49fb132 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,8 +25,10 @@ S3method(dim,initial_validation_split) S3method(dim,rsplit) S3method(get_rsplit,default) S3method(get_rsplit,rset) +S3method(inner_split,group_vfold_split) S3method(inner_split,grouped_mc_split) S3method(inner_split,mc_split) +S3method(inner_split,vfold_split) S3method(int_bca,bootstraps) S3method(int_pctl,bootstraps) S3method(int_t,bootstraps) diff --git a/R/inner_split.R b/R/inner_split.R index 1f7bd51f..94437918 100644 --- a/R/inner_split.R +++ b/R/inner_split.R @@ -56,3 +56,63 @@ inner_split.grouped_mc_split <- function(x, split_args, ...) { split_inner <- add_class(split_inner, class_inner) split_inner } + + +# vfold ------------------------------------------------------------------ + +#' @rdname inner_split +#' @export +inner_split.vfold_split <- function(x, split_args, ...) { + check_dots_empty() + + analysis_set <- analysis(x) + + # TODO should this be done outside of rsample, + # in workflows or tune? + if (is.null(split_args$prop)) { + split_args$prop <- 1 - 1/split_args$v + } + # use mc_splits for a random split + split_inner <- mc_splits( + analysis_set, + prop = split_args$prop, + times = 1, + strata = split_args$strata, + breaks = split_args$breaks, + pool = split_args$pool + ) + split_inner <- split_inner$splits[[1]] + + class_inner <- paste0(class(x)[1], "_inner") + class(split_inner) <- c(class_inner, class(x)) + split_inner +} + +#' @rdname inner_split +#' @export +inner_split.group_vfold_split <- function(x, split_args, ...) { + check_dots_empty() + + analysis_set <- analysis(x) + + # TODO should this be done outside of rsample, + # in workflows or tune? + if (is.null(split_args$prop)) { + split_args$prop <- 1 - 1/split_args$v + } + + # use group_mc_splits for a random split + split_inner <- group_mc_splits( + analysis_set, + group = split_args$group, + prop = split_args$prop, + times = 1, + strata = split_args$strata, + pool = split_args$pool + ) + split_inner <- split_inner$splits[[1]] + + class_inner <- paste0(class(x)[1], "_inner") + class(split_inner) <- c(class_inner, class(x)) + split_inner +} diff --git a/man/inner_split.Rd b/man/inner_split.Rd index b528f6aa..02a9cc41 100644 --- a/man/inner_split.Rd +++ b/man/inner_split.Rd @@ -4,6 +4,8 @@ \alias{inner_split} \alias{inner_split.mc_split} \alias{inner_split.grouped_mc_split} +\alias{inner_split.vfold_split} +\alias{inner_split.group_vfold_split} \title{Inner split of the analysis set for fitting a post-processor} \usage{ inner_split(x, ...) @@ -11,6 +13,10 @@ inner_split(x, ...) \method{inner_split}{mc_split}(x, split_args, ...) \method{inner_split}{grouped_mc_split}(x, split_args, ...) + +\method{inner_split}{vfold_split}(x, split_args, ...) + +\method{inner_split}{group_vfold_split}(x, split_args, ...) } \arguments{ \item{x}{An \code{rsplit} object.} diff --git a/tests/testthat/test-inner_split.R b/tests/testthat/test-inner_split.R index 8d2aeb53..6254316d 100644 --- a/tests/testthat/test-inner_split.R +++ b/tests/testthat/test-inner_split.R @@ -54,3 +54,60 @@ test_that("group_mc_split", { ignore_attr = "row.names" ) }) + + +# vfold ------------------------------------------------------------------ + +test_that("vfold_split", { + set.seed(11) + r_set <- vfold_cv(warpbreaks, v = 5) + split_args <- get_split_args(r_set) + r_split <- get_rsplit(r_set, 1) + + isplit <- inner_split(r_split, split_args) + + expect_identical( + isplit$data, + analysis(r_split) + ) + + expect_identical( + analysis(isplit), + isplit$data[isplit$in_id, ], + ignore_attr = "row.names" + ) + expect_identical( + assessment(isplit), + isplit$data[isplit$out_id, ], + ignore_attr = "row.names" + ) +}) + +test_that("group_vfold_split", { + skip_if_not_installed("modeldata") + + data(ames, package = "modeldata", envir = rlang::current_env()) + + set.seed(11) + r_set <- group_vfold_cv(ames, "MS_SubClass") + split_args <- get_split_args(r_set) + r_split <- get_rsplit(r_set, 1) + + isplit <- inner_split(r_split, split_args) + + expect_identical( + isplit$data, + analysis(r_split) + ) + + expect_identical( + analysis(isplit), + isplit$data[isplit$in_id, ], + ignore_attr = "row.names" + ) + expect_identical( + assessment(isplit), + isplit$data[isplit$out_id, ], + ignore_attr = "row.names" + ) +}) From d306c59e294ae7c5e363f662896c459b28ca4f51 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Tue, 21 May 2024 14:53:41 +0100 Subject: [PATCH 04/10] add `inner_split()` method for clustering cv --- NAMESPACE | 1 + R/inner_split.R | 25 +++++++++++++++++++++++++ man/inner_split.Rd | 3 +++ tests/testthat/test-inner_split.R | 29 +++++++++++++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index c49fb132..fa7dee7e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ S3method(dim,initial_validation_split) S3method(dim,rsplit) S3method(get_rsplit,default) S3method(get_rsplit,rset) +S3method(inner_split,clustering_split) S3method(inner_split,group_vfold_split) S3method(inner_split,grouped_mc_split) S3method(inner_split,mc_split) diff --git a/R/inner_split.R b/R/inner_split.R index 94437918..59857c41 100644 --- a/R/inner_split.R +++ b/R/inner_split.R @@ -116,3 +116,28 @@ inner_split.group_vfold_split <- function(x, split_args, ...) { class(split_inner) <- c(class_inner, class(x)) split_inner } + +# clustering ------------------------------------------------------------- + +#' @rdname inner_split +#' @export +inner_split.clustering_split <- function(x, split_args, ...) { + check_dots_empty() + + analysis_set <- analysis(x) + + # TODO: reduce the number of clusters by 1 in tune? + split_inner <- clustering_cv( + analysis_set, + vars = split_args$vars, + v = split_args$v, + repeats = 1, + distance_function = split_args$distance_function, + cluster_function = split_args$cluster_function + ) + split_inner <- split_inner$splits[[1]] + + class_inner <- paste0(class(x)[1], "_inner") + class(split_inner) <- c(class_inner, class(x)) + split_inner +} diff --git a/man/inner_split.Rd b/man/inner_split.Rd index 02a9cc41..814f2d4b 100644 --- a/man/inner_split.Rd +++ b/man/inner_split.Rd @@ -6,6 +6,7 @@ \alias{inner_split.grouped_mc_split} \alias{inner_split.vfold_split} \alias{inner_split.group_vfold_split} +\alias{inner_split.clustering_split} \title{Inner split of the analysis set for fitting a post-processor} \usage{ inner_split(x, ...) @@ -17,6 +18,8 @@ inner_split(x, ...) \method{inner_split}{vfold_split}(x, split_args, ...) \method{inner_split}{group_vfold_split}(x, split_args, ...) + +\method{inner_split}{clustering_split}(x, split_args, ...) } \arguments{ \item{x}{An \code{rsplit} object.} diff --git a/tests/testthat/test-inner_split.R b/tests/testthat/test-inner_split.R index 6254316d..839ca9e2 100644 --- a/tests/testthat/test-inner_split.R +++ b/tests/testthat/test-inner_split.R @@ -111,3 +111,32 @@ test_that("group_vfold_split", { ignore_attr = "row.names" ) }) + + +# clustering ------------------------------------------------------------- + +test_that("clustering_split", { + set.seed(11) + r_set <- clustering_cv(warpbreaks, vars = breaks, v = 5) + split_args <- get_split_args(r_set) + r_split <- get_rsplit(r_set, 1) + + isplit <- inner_split(r_split, split_args) + + expect_identical( + isplit$data, + analysis(r_split) + ) + + expect_identical( + analysis(isplit), + isplit$data[isplit$in_id, ], + ignore_attr = "row.names" + ) + expect_identical( + assessment(isplit), + isplit$data[-isplit$in_id, ], + ignore_attr = "row.names" + ) +}) + From d668b7533699ef4d040bea3f201d395b57906bc4 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Tue, 21 May 2024 14:55:31 +0100 Subject: [PATCH 05/10] add `inner_split()` method for apparent split --- NAMESPACE | 1 + R/inner_split.R | 18 ++++++++++++++++++ man/inner_split.Rd | 3 +++ tests/testthat/test-inner_split.R | 23 +++++++++++++++++++++++ 4 files changed, 45 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index fa7dee7e..9b01b7e7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ S3method(dim,initial_validation_split) S3method(dim,rsplit) S3method(get_rsplit,default) S3method(get_rsplit,rset) +S3method(inner_split,apparent_split) S3method(inner_split,clustering_split) S3method(inner_split,group_vfold_split) S3method(inner_split,grouped_mc_split) diff --git a/R/inner_split.R b/R/inner_split.R index 59857c41..02c9482c 100644 --- a/R/inner_split.R +++ b/R/inner_split.R @@ -141,3 +141,21 @@ inner_split.clustering_split <- function(x, split_args, ...) { class(split_inner) <- c(class_inner, class(x)) split_inner } + + +# apparent --------------------------------------------------------------- + +#' @rdname inner_split +#' @export +inner_split.apparent_split <- function(x, ...) { + check_dots_empty() + + analysis_set <- analysis(x) + + split_inner <- apparent(analysis_set) + split_inner <- split_inner$splits[[1]] + + class_inner <- paste0(class(x)[1], "_inner") + class(split_inner) <- c(class_inner, class(x)) + split_inner +} diff --git a/man/inner_split.Rd b/man/inner_split.Rd index 814f2d4b..4cbe9bef 100644 --- a/man/inner_split.Rd +++ b/man/inner_split.Rd @@ -7,6 +7,7 @@ \alias{inner_split.vfold_split} \alias{inner_split.group_vfold_split} \alias{inner_split.clustering_split} +\alias{inner_split.apparent_split} \title{Inner split of the analysis set for fitting a post-processor} \usage{ inner_split(x, ...) @@ -20,6 +21,8 @@ inner_split(x, ...) \method{inner_split}{group_vfold_split}(x, split_args, ...) \method{inner_split}{clustering_split}(x, split_args, ...) + +\method{inner_split}{apparent_split}(x, ...) } \arguments{ \item{x}{An \code{rsplit} object.} diff --git a/tests/testthat/test-inner_split.R b/tests/testthat/test-inner_split.R index 839ca9e2..8c7ed21e 100644 --- a/tests/testthat/test-inner_split.R +++ b/tests/testthat/test-inner_split.R @@ -140,3 +140,26 @@ test_that("clustering_split", { ) }) +# apparent --------------------------------------------------------------- + +test_that("apparent_split", { + set.seed(11) + r_set <- apparent(warpbreaks) + r_split <- get_rsplit(r_set, 1) + + isplit <- inner_split(r_split) + + expect_identical( + isplit$data, + analysis(r_split) + ) + + expect_identical( + analysis(isplit), + analysis(r_split) + ) + expect_identical( + assessment(isplit), + analysis(r_split) + ) +}) From 893b071a9bf85f756375fa1a95c643302b55fe5b Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Wed, 22 May 2024 21:06:16 +0100 Subject: [PATCH 06/10] a little metaprogramming to force errors for usused arguments if necessary, i.e. not swallow them silently. --- R/inner_split.R | 59 ++++++++++++++++++------------------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/R/inner_split.R b/R/inner_split.R index 02c9482c..d20cce83 100644 --- a/R/inner_split.R +++ b/R/inner_split.R @@ -18,14 +18,10 @@ inner_split.mc_split <- function(x, split_args, ...) { check_dots_empty() analysis_set <- analysis(x) - - split_inner <- mc_splits( - analysis_set, - prop = split_args$prop, - times = 1, - strata = split_args$strata, - breaks = split_args$breaks, - pool = split_args$pool + + split_args$times <- 1 + split_inner <- rlang::inject( + mc_splits(analysis_set, !!!split_args) ) split_inner <- split_inner$splits[[1]] @@ -41,14 +37,10 @@ inner_split.grouped_mc_split <- function(x, split_args, ...) { check_dots_empty() analysis_set <- analysis(x) - - split_inner <- group_mc_splits( - analysis_set, - group = split_args$group, - prop = split_args$prop, - times = 1, - strata = split_args$strata, - pool = split_args$pool + + split_args$times <- 1 + split_inner <- rlang::inject( + group_mc_splits(analysis_set, !!!split_args) ) split_inner <- split_inner$splits[[1]] @@ -73,13 +65,11 @@ inner_split.vfold_split <- function(x, split_args, ...) { split_args$prop <- 1 - 1/split_args$v } # use mc_splits for a random split - split_inner <- mc_splits( - analysis_set, - prop = split_args$prop, - times = 1, - strata = split_args$strata, - breaks = split_args$breaks, - pool = split_args$pool + split_args$times <- 1 + split_args$v <- NULL + split_args$repeats <- NULL + split_inner <- rlang::inject( + mc_splits(analysis_set, !!!split_args) ) split_inner <- split_inner$splits[[1]] @@ -102,13 +92,12 @@ inner_split.group_vfold_split <- function(x, split_args, ...) { } # use group_mc_splits for a random split - split_inner <- group_mc_splits( - analysis_set, - group = split_args$group, - prop = split_args$prop, - times = 1, - strata = split_args$strata, - pool = split_args$pool + split_args$times <- 1 + split_args$v <- NULL + split_args$repeats <- NULL + split_args$balance <- NULL + split_inner <- rlang::inject( + group_mc_splits(analysis_set, !!!split_args) ) split_inner <- split_inner$splits[[1]] @@ -127,13 +116,9 @@ inner_split.clustering_split <- function(x, split_args, ...) { analysis_set <- analysis(x) # TODO: reduce the number of clusters by 1 in tune? - split_inner <- clustering_cv( - analysis_set, - vars = split_args$vars, - v = split_args$v, - repeats = 1, - distance_function = split_args$distance_function, - cluster_function = split_args$cluster_function + split_args$repeats <- 1 + split_inner <- rlang::inject( + clustering_cv(analysis_set, !!!split_args) ) split_inner <- split_inner$splits[[1]] From f42e8fdbe5fc9bcf64082e585ed1464971c07017 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 23 May 2024 10:51:31 +0100 Subject: [PATCH 07/10] export renamed helper for use in tune --- NAMESPACE | 1 + R/misc.R | 9 +++++++-- man/dot-get_split_args.Rd | 18 ++++++++++++++++++ tests/testthat/test-inner_split.R | 10 +++++----- 4 files changed, 31 insertions(+), 7 deletions(-) create mode 100644 man/dot-get_split_args.Rd diff --git a/NAMESPACE b/NAMESPACE index 9b01b7e7..55832fe4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -349,6 +349,7 @@ S3method(vec_restore,validation_split) S3method(vec_restore,validation_time_split) S3method(vec_restore,vfold_cv) export(.get_fingerprint) +export(.get_split_args) export(add_resample_id) export(all_of) export(analysis) diff --git a/R/misc.R b/R/misc.R index 236f367f..3b1aa7bf 100644 --- a/R/misc.R +++ b/R/misc.R @@ -272,7 +272,7 @@ reshuffle_rset <- function(rset) { } rset_type <- class(rset)[[1]] - split_arguments <- get_split_args(rset) + split_arguments <- .get_split_args(rset) if (identical(split_arguments$strata, TRUE)) { rlang::abort( "Cannot reshuffle this rset (`attr(rset, 'strata')` is `TRUE`, not a column identifier)", @@ -295,7 +295,12 @@ non_random_classes <- c( "validation_set" ) -get_split_args <- function(rset) { +#' Get the split arguments from an rset +#' @param rset An `rset` object. +#' @return A list of arguments used to create the rset. +#' @keywords internal +#' @export +.get_split_args <- function(rset) { all_attributes <- attributes(rset) args <- names(formals(all_attributes$class[[1]])) split_args <- all_attributes[args] diff --git a/man/dot-get_split_args.Rd b/man/dot-get_split_args.Rd new file mode 100644 index 00000000..c4bccaaf --- /dev/null +++ b/man/dot-get_split_args.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/misc.R +\name{.get_split_args} +\alias{.get_split_args} +\title{Get the split arguments from an rset} +\usage{ +.get_split_args(rset) +} +\arguments{ +\item{rset}{An \code{rset} object.} +} +\value{ +A list of arguments used to create the rset. +} +\description{ +Get the split arguments from an rset +} +\keyword{internal} diff --git a/tests/testthat/test-inner_split.R b/tests/testthat/test-inner_split.R index 8c7ed21e..61153f2c 100644 --- a/tests/testthat/test-inner_split.R +++ b/tests/testthat/test-inner_split.R @@ -4,7 +4,7 @@ test_that("mc_split", { set.seed(11) r_set <- mc_cv(warpbreaks) - split_args <- get_split_args(r_set) + split_args <- .get_split_args(r_set) r_split <- get_rsplit(r_set, 1) isplit <- inner_split(r_split, split_args) @@ -33,7 +33,7 @@ test_that("group_mc_split", { set.seed(11) r_set <- group_mc_cv(ames, "MS_SubClass") - split_args <- get_split_args(r_set) + split_args <- .get_split_args(r_set) r_split <- get_rsplit(r_set, 1) isplit <- inner_split(r_split, split_args) @@ -61,7 +61,7 @@ test_that("group_mc_split", { test_that("vfold_split", { set.seed(11) r_set <- vfold_cv(warpbreaks, v = 5) - split_args <- get_split_args(r_set) + split_args <- .get_split_args(r_set) r_split <- get_rsplit(r_set, 1) isplit <- inner_split(r_split, split_args) @@ -90,7 +90,7 @@ test_that("group_vfold_split", { set.seed(11) r_set <- group_vfold_cv(ames, "MS_SubClass") - split_args <- get_split_args(r_set) + split_args <- .get_split_args(r_set) r_split <- get_rsplit(r_set, 1) isplit <- inner_split(r_split, split_args) @@ -118,7 +118,7 @@ test_that("group_vfold_split", { test_that("clustering_split", { set.seed(11) r_set <- clustering_cv(warpbreaks, vars = breaks, v = 5) - split_args <- get_split_args(r_set) + split_args <- .get_split_args(r_set) r_split <- get_rsplit(r_set, 1) isplit <- inner_split(r_split, split_args) From 171c83fcbad91f818cb19a8bfe858ad59013d9c4 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 23 May 2024 10:57:09 +0100 Subject: [PATCH 08/10] update to new class for grouped MC --- NAMESPACE | 2 +- R/inner_split.R | 3 +-- man/inner_split.Rd | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 55832fe4..d01b9f3f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,8 +27,8 @@ S3method(get_rsplit,default) S3method(get_rsplit,rset) S3method(inner_split,apparent_split) S3method(inner_split,clustering_split) +S3method(inner_split,group_mc_split) S3method(inner_split,group_vfold_split) -S3method(inner_split,grouped_mc_split) S3method(inner_split,mc_split) S3method(inner_split,vfold_split) S3method(int_bca,bootstraps) diff --git a/R/inner_split.R b/R/inner_split.R index d20cce83..6b749348 100644 --- a/R/inner_split.R +++ b/R/inner_split.R @@ -32,8 +32,7 @@ inner_split.mc_split <- function(x, split_args, ...) { #' @rdname inner_split #' @export -inner_split.grouped_mc_split <- function(x, split_args, ...) { - # FIXME update this class to `group_mc_split` +inner_split.group_mc_split <- function(x, split_args, ...) { check_dots_empty() analysis_set <- analysis(x) diff --git a/man/inner_split.Rd b/man/inner_split.Rd index 4cbe9bef..81e89391 100644 --- a/man/inner_split.Rd +++ b/man/inner_split.Rd @@ -3,7 +3,7 @@ \name{inner_split} \alias{inner_split} \alias{inner_split.mc_split} -\alias{inner_split.grouped_mc_split} +\alias{inner_split.group_mc_split} \alias{inner_split.vfold_split} \alias{inner_split.group_vfold_split} \alias{inner_split.clustering_split} @@ -14,7 +14,7 @@ inner_split(x, ...) \method{inner_split}{mc_split}(x, split_args, ...) -\method{inner_split}{grouped_mc_split}(x, split_args, ...) +\method{inner_split}{group_mc_split}(x, split_args, ...) \method{inner_split}{vfold_split}(x, split_args, ...) From 3a6030aff9e60522e98379990289f43b98f95389 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 23 May 2024 10:57:47 +0100 Subject: [PATCH 09/10] update NEWS --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 9404b5e8..75785b90 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # rsample (development version) +* The new `inner_split()` function and its methods for various resamples is for usage in tune to create a inner resample of the analysis set to fit the preprocessor and model on one part and the post-processor on the other part (#483). + ## Bug fixes * `vfold_cv()` now utilizes the `breaks` argument correctly for repeated cross-validation (@ZWael, #471). From f915a34e002e7b1e59b29e9b991ab054e79917f1 Mon Sep 17 00:00:00 2001 From: Hannah Frick Date: Thu, 23 May 2024 11:27:16 +0100 Subject: [PATCH 10/10] add more documentation --- R/inner_split.R | 14 ++++++++++++++ man/inner_split.Rd | 16 ++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/R/inner_split.R b/R/inner_split.R index 6b749348..6cd442df 100644 --- a/R/inner_split.R +++ b/R/inner_split.R @@ -4,6 +4,20 @@ #' @param split_args A list of arguments to be used for the inner split. #' @param ... Not currently used. #' @return An `rsplit` object. +#' @details +#' `rsplit` objects live most commonly inside of an `rset` object. The +#' `split_args` argument can be the output of [.get_split_args()] on that +#' corresponding `rset` object, even if some of the arguments used to creat the +#' `rset` object are not needed for the inner split. +#' * For `mc_split` and `group_mc_split` objects, `inner_split()` will ignore +#' `split_args$times`. +#' * For `vfold_split` and `group_vfold_split` objects, it will ignore +#' `split_args$times` and `split_args$repeats`. `split_args$v` will be used to +#' set `split_args$prop` to `1 - 1/v` if `prop` is not already set and otherwise +#' ignored. The method +#' for `group_vfold_split` will always use `split_args$balance = NULL`. +#' * For `clustering_split` objects, it will ignore `split_args$repeats`. +#' #' @keywords internal #' @export inner_split <- function(x, ...) { diff --git a/man/inner_split.Rd b/man/inner_split.Rd index 81e89391..b4114927 100644 --- a/man/inner_split.Rd +++ b/man/inner_split.Rd @@ -37,4 +37,20 @@ An \code{rsplit} object. \description{ Inner split of the analysis set for fitting a post-processor } +\details{ +\code{rsplit} objects live most commonly inside of an \code{rset} object. The +\code{split_args} argument can be the output of \code{\link[=.get_split_args]{.get_split_args()}} on that +corresponding \code{rset} object, even if some of the arguments used to creat the +\code{rset} object are not needed for the inner split. +\itemize{ +\item For \code{mc_split} and \code{group_mc_split} objects, \code{inner_split()} will ignore +\code{split_args$times}. +\item For \code{vfold_split} and \code{group_vfold_split} objects, it will ignore +\code{split_args$times} and \code{split_args$repeats}. \code{split_args$v} will be used to +set \code{split_args$prop} to \code{1 - 1/v} if \code{prop} is not already set and otherwise +ignored. The method +for \code{group_vfold_split} will always use \code{split_args$balance = NULL}. +\item For \code{clustering_split} objects, it will ignore \code{split_args$repeats}. +} +} \keyword{internal}