diff --git a/R/adjust-equivocal-zone.R b/R/adjust-equivocal-zone.R index 34c68ce..1375b7a 100644 --- a/R/adjust-equivocal-zone.R +++ b/R/adjust-equivocal-zone.R @@ -5,23 +5,34 @@ #' value is the size of the buffer around the threshold. #' @param threshold A numeric value (between zero and one) or [hardhat::tune()]. #' @examplesIf rlang::is_installed("modeldata") -#' library(dplyr) #' library(modeldata) #' -#' post_obj <- +#' head(two_class_example) +#' +#' # `predicted` gives hard class predictions based on probabilities +#' two_class_example %>% count(predicted) +#' +#' # when probabilities are within (.25, .75), consider them equivocal +#' tlr <- #' tailor() %>% #' adjust_equivocal_zone(value = 1 / 4) #' +#' tlr #' -#' post_res <- fit( -#' post_obj, +#' # fit by supplying column names. situate in a modeling workflow +#' # with `workflows::add_tailor()` to avoid having to do so manually +#' tlr_fit <- fit( +#' tlr, #' two_class_example, #' outcome = c(truth), #' estimate = c(predicted), #' probabilities = c(Class1, Class2) #' ) #' -#' predict(post_res, two_class_example) +#' tlr_fit +#' +#' # adjust hard class predictions +#' predict(tlr_fit, two_class_example) %>% count(predicted) #' @export adjust_equivocal_zone <- function(x, value = 0.1, threshold = 1 / 2) { check_tailor(x) diff --git a/R/adjust-numeric-calibration.R b/R/adjust-numeric-calibration.R index fff0eeb..ea8bd49 100644 --- a/R/adjust-numeric-calibration.R +++ b/R/adjust-numeric-calibration.R @@ -6,26 +6,29 @@ #' package [probably::cal_estimate_linear()], #' [probably::cal_estimate_isotonic()], or #' [probably::cal_estimate_isotonic_boot()], respectively. -#' @examplesIf rlang::is_installed("modeldata") -#' library(modeldata) -#' library(probably) +#' @examples #' library(tibble) #' #' # create example data #' set.seed(1) -#' dat <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) +#' d_potato <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) +#' d_test <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) #' -#' dat +#' d_potato #' #' # specify calibration -#' reg_tailor <- +#' tlr <- #' tailor() %>% #' adjust_numeric_calibration(method = "linear") #' -#' # train tailor -#' reg_tailor_trained <- fit(reg_tailor, dat, outcome = y, estimate = y_pred) +#' # train tailor on a subset of data. situate in a modeling workflow with +#' # `workflows::add_tailor()` to avoid having to specify column names manually +#' tlr_fit <- fit(tlr, d_potato, outcome = y, estimate = y_pred) #' -#' predict(reg_tailor_trained, dat) +#' # apply to predictions on another subset of data +#' d_test +#' +#' predict(tlr_fit, d_test) #' @export adjust_numeric_calibration <- function(x, method = NULL) { check_tailor(x, calibration_type = "numeric") diff --git a/R/adjust-numeric-range.R b/R/adjust-numeric-range.R index e40b022..c79f08e 100644 --- a/R/adjust-numeric-range.R +++ b/R/adjust-numeric-range.R @@ -3,6 +3,26 @@ #' @param x A [tailor()]. #' @param upper_limit,lower_limit A numeric value, NA (for no truncation) or #' [hardhat::tune()]. +#' +#' @examplesIf FALSE +# # TODO: unskip -- fn currently requires estimate to be called `.pred` (#22) +#' library(tibble) +#' +#' # create example data +#' set.seed(1) +#' d <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) +#' d +#' +#' # specify calibration +#' tlr <- +#' tailor() %>% +#' adjust_numeric_range(lower_limit = 1) +#' +#' # train tailor by passing column names. situate in a modeling workflow with +#' # `workflows::add_tailor()` to avoid having to specify column names manually +#' tlr_fit <- fit(tlr, d, outcome = y, estimate = y_pred) +#' +#' predict(tlr_fit, d) #' @export adjust_numeric_range <- function(x, lower_limit = -Inf, upper_limit = Inf) { # remaining input checks are done via probably::bound_prediction diff --git a/R/adjust-predictions-custom.R b/R/adjust-predictions-custom.R index 3c63699..d304f9c 100644 --- a/R/adjust-predictions-custom.R +++ b/R/adjust-predictions-custom.R @@ -5,24 +5,24 @@ #' the commands. #' @param ... Name-value pairs of expressions. See [dplyr::mutate()]. #' @examplesIf rlang::is_installed("modeldata") -#' library(dplyr) #' library(modeldata) #' -#' post_obj <- +#' head(two_class_example) +#' +#' tlr <- #' tailor() %>% #' adjust_equivocal_zone() %>% #' adjust_predictions_custom(linear_predictor = binomial()$linkfun(Class2)) #' -#' -#' post_res <- fit( -#' post_obj, +#' tlr_fit <- fit( +#' tlr, #' two_class_example, #' outcome = c(truth), #' estimate = c(predicted), #' probabilities = c(Class1, Class2) #' ) #' -#' predict(post_res, two_class_example) +#' predict(tlr_fit, two_class_example) %>% head() #' @export adjust_predictions_custom <- function(x, ..., .pkgs = character(0)) { check_tailor(x) diff --git a/R/adjust-probability-calibration.R b/R/adjust-probability-calibration.R index d4debc8..7d30ced 100644 --- a/R/adjust-probability-calibration.R +++ b/R/adjust-probability-calibration.R @@ -5,6 +5,40 @@ #' `"beta"`, `"isotonic"`, or `"isotonic_boot"`, corresponding to the #' function from the \pkg{probably} package [probably::cal_estimate_logistic()], #' [probably::cal_estimate_multinomial()], etc., respectively. +#' +# TODO: see #36 +#' @examplesIf FALSE +# @examplesIf rlang::is_installed("modeldata") +#' library(modeldata) +#' +#' # split example data +#' set.seed(1) +#' in_rows <- sample(c(TRUE, FALSE), nrow(two_class_example), replace = TRUE) +#' d_potato <- two_class_example[in_rows, ] +#' d_test <- two_class_example[!in_rows, ] +#' +#' head(d_potato) +#' +#' # specify calibration +#' tlr <- +#' tailor() %>% +#' adjust_probability_calibration(method = "logistic") +#' +#' # train tailor on a subset of data. situate in a modeling workflow with +#' # `workflows::add_tailor()` to avoid having to specify column names manually +#' tlr_fit <- fit( +#' tlr, +#' d_potato, +#' outcome = c(truth), +#' estimate = c(predicted), +#' probabilities = c(Class1, Class2) +#' ) +#' +#' # apply to predictions on another subset of data +#' head(d_test) +#' +#' predict(tlr_fit, d_test) +#' #' @export adjust_probability_calibration <- function(x, method = NULL) { check_tailor(x, calibration_type = "probability") diff --git a/R/adjust-probability-threshold.R b/R/adjust-probability-threshold.R index ba5a333..3cba71d 100644 --- a/R/adjust-probability-threshold.R +++ b/R/adjust-probability-threshold.R @@ -3,24 +3,28 @@ #' @param x A [tailor()]. #' @param threshold A numeric value (between zero and one) or [hardhat::tune()]. #' @examplesIf rlang::is_installed("modeldata") -#' library(dplyr) #' library(modeldata) #' -#' post_obj <- -#' tailor() %>% -#' adjust_probability_threshold(threshold = .1) +#' # `predicted` gives hard class predictions based on probability threshold .5 +#' head(two_class_example) #' -#' two_class_example %>% count(predicted) +#' # use a threshold of .1 instead: +#' tlr <- +#' tailor() %>% +#' adjust_probability_threshold(.1) #' -#' post_res <- fit( -#' post_obj, +#' # fit by supplying column names. situate in a modeling workflow +#' # with `workflows::add_tailor()` to avoid having to do so manually +#' tlr_fit <- fit( +#' tlr, #' two_class_example, #' outcome = c(truth), #' estimate = c(predicted), #' probabilities = c(Class1, Class2) #' ) #' -#' predict(post_res, two_class_example) %>% count(predicted) +#' # adjust hard class predictions +#' predict(tlr_fit, two_class_example) %>% head() #' @export adjust_probability_threshold <- function(x, threshold = 0.5) { check_tailor(x) diff --git a/R/tailor.R b/R/tailor.R index 180e7b8..8932a47 100644 --- a/R/tailor.R +++ b/R/tailor.R @@ -28,7 +28,6 @@ #' classification, these should be given in the order of the factor levels of #' the `estimate`. #' @examplesIf rlang::is_installed("modeldata") -#' library(dplyr) #' library(modeldata) #' #' # `predicted` gives hard class predictions based on probabilities diff --git a/man/adjust_equivocal_zone.Rd b/man/adjust_equivocal_zone.Rd index 87d66cb..67a8f57 100644 --- a/man/adjust_equivocal_zone.Rd +++ b/man/adjust_equivocal_zone.Rd @@ -19,22 +19,33 @@ Apply an equivocal zone to a binary classification model. } \examples{ \dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -library(dplyr) library(modeldata) -post_obj <- +head(two_class_example) + +# `predicted` gives hard class predictions based on probabilities +two_class_example \%>\% count(predicted) + +# when probabilities are within (.25, .75), consider them equivocal +tlr <- tailor() \%>\% adjust_equivocal_zone(value = 1 / 4) +tlr -post_res <- fit( - post_obj, +# fit by supplying column names. situate in a modeling workflow +# with `workflows::add_tailor()` to avoid having to do so manually +tlr_fit <- fit( + tlr, two_class_example, outcome = c(truth), estimate = c(predicted), probabilities = c(Class1, Class2) ) -predict(post_res, two_class_example) +tlr_fit + +# adjust hard class predictions +predict(tlr_fit, two_class_example) \%>\% count(predicted) \dontshow{\}) # examplesIf} } diff --git a/man/adjust_numeric_calibration.Rd b/man/adjust_numeric_calibration.Rd index 9c79b7a..51aa081 100644 --- a/man/adjust_numeric_calibration.Rd +++ b/man/adjust_numeric_calibration.Rd @@ -19,25 +19,26 @@ package \code{\link[probably:cal_estimate_linear]{probably::cal_estimate_linear( Re-calibrate numeric predictions } \examples{ -\dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -library(modeldata) -library(probably) library(tibble) # create example data set.seed(1) -dat <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) +d_potato <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) +d_test <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) -dat +d_potato # specify calibration -reg_tailor <- +tlr <- tailor() \%>\% adjust_numeric_calibration(method = "linear") -# train tailor -reg_tailor_trained <- fit(reg_tailor, dat, outcome = y, estimate = y_pred) +# train tailor on a subset of data. situate in a modeling workflow with +# `workflows::add_tailor()` to avoid having to specify column names manually +tlr_fit <- fit(tlr, d_potato, outcome = y, estimate = y_pred) -predict(reg_tailor_trained, dat) -\dontshow{\}) # examplesIf} +# apply to predictions on another subset of data +d_test + +predict(tlr_fit, d_test) } diff --git a/man/adjust_numeric_range.Rd b/man/adjust_numeric_range.Rd index 451a7d8..2f8d413 100644 --- a/man/adjust_numeric_range.Rd +++ b/man/adjust_numeric_range.Rd @@ -15,3 +15,24 @@ adjust_numeric_range(x, lower_limit = -Inf, upper_limit = Inf) \description{ Truncate the range of numeric predictions } +\examples{ +\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +library(tibble) + +# create example data +set.seed(1) +d <- tibble(y = rnorm(100), y_pred = y/2 + rnorm(100)) +d + +# specify calibration +tlr <- + tailor() \%>\% + adjust_numeric_range(lower_limit = 1) + +# train tailor by passing column names. situate in a modeling workflow with +# `workflows::add_tailor()` to avoid having to specify column names manually +tlr_fit <- fit(tlr, d, outcome = y, estimate = y_pred) + +predict(tlr_fit, d) +\dontshow{\}) # examplesIf} +} diff --git a/man/adjust_predictions_custom.Rd b/man/adjust_predictions_custom.Rd index 85cd7a9..38847bd 100644 --- a/man/adjust_predictions_custom.Rd +++ b/man/adjust_predictions_custom.Rd @@ -19,23 +19,23 @@ Change or add variables } \examples{ \dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -library(dplyr) library(modeldata) -post_obj <- +head(two_class_example) + +tlr <- tailor() \%>\% adjust_equivocal_zone() \%>\% adjust_predictions_custom(linear_predictor = binomial()$linkfun(Class2)) - -post_res <- fit( - post_obj, +tlr_fit <- fit( + tlr, two_class_example, outcome = c(truth), estimate = c(predicted), probabilities = c(Class1, Class2) ) -predict(post_res, two_class_example) +predict(tlr_fit, two_class_example) \%>\% head() \dontshow{\}) # examplesIf} } diff --git a/man/adjust_probability_calibration.Rd b/man/adjust_probability_calibration.Rd index c4120c5..644de96 100644 --- a/man/adjust_probability_calibration.Rd +++ b/man/adjust_probability_calibration.Rd @@ -17,3 +17,36 @@ function from the \pkg{probably} package \code{\link[probably:cal_estimate_logis \description{ Re-calibrate classification probability predictions } +\examples{ +\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +library(modeldata) + +# split example data +set.seed(1) +in_rows <- sample(c(TRUE, FALSE), nrow(two_class_example), replace = TRUE) +d_potato <- two_class_example[in_rows, ] +d_test <- two_class_example[!in_rows, ] + +head(d_potato) + +# specify calibration +tlr <- + tailor() \%>\% + adjust_probability_calibration(method = "logistic") + +# train tailor on a subset of data. situate in a modeling workflow with +# `workflows::add_tailor()` to avoid having to specify column names manually +tlr_fit <- fit( + tlr, + d_potato, + outcome = c(truth), + estimate = c(predicted), + probabilities = c(Class1, Class2) +) + +# apply to predictions on another subset of data +head(d_test) + +predict(tlr_fit, d_test) +\dontshow{\}) # examplesIf} +} diff --git a/man/adjust_probability_threshold.Rd b/man/adjust_probability_threshold.Rd index 14128b1..2da6da3 100644 --- a/man/adjust_probability_threshold.Rd +++ b/man/adjust_probability_threshold.Rd @@ -16,23 +16,27 @@ Change the event threshold } \examples{ \dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -library(dplyr) library(modeldata) -post_obj <- - tailor() \%>\% - adjust_probability_threshold(threshold = .1) +# `predicted` gives hard class predictions based on probability threshold .5 +head(two_class_example) -two_class_example \%>\% count(predicted) +# use a threshold of .1 instead: +tlr <- + tailor() \%>\% + adjust_probability_threshold(.1) -post_res <- fit( - post_obj, +# fit by supplying column names. situate in a modeling workflow +# with `workflows::add_tailor()` to avoid having to do so manually +tlr_fit <- fit( + tlr, two_class_example, outcome = c(truth), estimate = c(predicted), probabilities = c(Class1, Class2) ) -predict(post_res, two_class_example) \%>\% count(predicted) +# adjust hard class predictions +predict(tlr_fit, two_class_example) \%>\% head() \dontshow{\}) # examplesIf} } diff --git a/man/tailor.Rd b/man/tailor.Rd index fb8b01f..0f1158d 100644 --- a/man/tailor.Rd +++ b/man/tailor.Rd @@ -40,7 +40,6 @@ of use, situate tailors in model workflows with \code{\link[workflows:add_tailor } \examples{ \dontshow{if (rlang::is_installed("modeldata")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -library(dplyr) library(modeldata) # `predicted` gives hard class predictions based on probabilities