From 49eab2b52204c6186970f62fde6027af36b1e906 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:24:24 -0400 Subject: [PATCH 01/46] start transformation functions --- R/convert_output_types.R | 100 +++++++++ tests/testthat/test-convert_output_types.R | 247 +++++++++++++++++++++ 2 files changed, 347 insertions(+) create mode 100644 R/convert_output_types.R create mode 100644 tests/testthat/test-convert_output_types.R diff --git a/R/convert_output_types.R b/R/convert_output_types.R new file mode 100644 index 00000000..62755925 --- /dev/null +++ b/R/convert_output_types.R @@ -0,0 +1,100 @@ +#' assume starting output type is sample, inputs have been validated +#' support one input output_type and one output output_type +#' +#' @param new_output_type vector of strings indicating the desired output_type after +#' transformation; can be `"mean"`, `"median"`, `"quantile"`, `"cdf"` +#' @param new_output_type_id corresponding output_type_ids, list if multiple output_type ids +convert_output_type <- function(model_outputs, group_by_cols, + new_output_type, new_output_type_id, + n_samples = 1e4, ...){ + # for cdf and quantile functions, get samples + starting_output_type = model_outputs$output_type %>% unique() + starting_output_type_ids = model_outputs$output_type_id %>% unique() + if(starting_output_type == "cdf"){ + # estimate from samples + model_outputs <- get_samples_from_cdf(model_outputs, group_by_cols, n_samples) + } + else if(starting_output_type == "quantile"){ + # if median output desired, and Q50 provided return exact value + if(new_output_type == "median" & 0.5 %in% starting_output_type_ids){ + model_outputs_transform <- model_outputs %>% + dplyr::filter(output_type_id == 0.5) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% + hubUtils::as_model_out_tbl() + return(model_outputs_transform) + } + # otherwise, estimate from samples + else{ + model_outputs <- get_samples_from_quantiles(model_outputs, group_by_cols, n_samples) + } + } + # transform based on new_output_type + grouped_model_outputs <- model_outputs %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) + model_outputs_transform <- convert_from_sample( + grouped_model_outputs, new_output_type, new_output_type_id + ) + return(model_outputs_transform) +} + +get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, ...){ + set.seed(101) + samples <- model_outputs %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% + dplyr::summarize( + value = distfromq::make_q_fn( + ps = as.numeric(.data$output_type_id), + qs = .data$value, ... + )(runif(n_samples, 0, 1)), + .groups = "drop" + ) + + + return(samples) +} + +get_samples_from_cdf <- function(model_outputs, group_by_cols, n_samples, ...){ + set.seed(101) + samples <- model_outputs %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% + dplyr::summarize( + value = distfromq::make_q_fn( + ps = .data$value, + qs = as.numeric(.data$output_type_id), ... + )(runif(n_samples, 0, 1)), + .groups = "drop" + ) + return(samples) +} + +convert_from_sample <- function(grouped_model_outputs, new_output_type, + new_output_type_id){ + if(new_output_type == "mean"){ + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe(value = mean(value), + output_type_id = new_output_type_id) + } + else if(new_output_type == "median"){ + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe(value = median(value), + output_type_id = new_output_type_id) + } + else if(new_output_type == "quantile"){ + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe(value = quantile(value, as.numeric(new_output_type_id), + names = FALSE), + output_type_id = new_output_type_id) + } + else if(new_output_type == "cdf"){ + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe(value = ecdf(value)(as.numeric(new_output_type_id)), + output_type_id = new_output_type_id) + } + # update output_type and output_type_id columns + model_outputs_transform <- model_outputs_transform %>% + dplyr::mutate(output_type = new_output_type) %>% + hubUtils::as_model_out_tbl() + return(model_outputs_transform) +} + diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R new file mode 100644 index 00000000..c50444ba --- /dev/null +++ b/tests/testthat/test-convert_output_types.R @@ -0,0 +1,247 @@ +### test convert_output_type() +test_that("convert_output_type works (quantile >> mean)", { + ex_qs <- seq(0,1,length.out = 500)[2:499] + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = ex_qs, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + new_output_type = "mean" + new_output_type_id = NA + expected <- tibble::tibble( + grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) + ) %>% + dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + new_output_type, new_output_type_id) + expect_equal(expected, test, tolerance = 1e-2) +}) + +test_that("convert_output_type works (quantile >> median)", { + ex_qs <- seq(0,1,length.out = 500)[2:499] + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = ex_qs, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + new_output_type = "median" + new_output_type_id = NA + expected <- tibble::tibble( + grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) + ) %>% + dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + new_output_type, new_output_type_id) + expect_equal(expected, test, tolerance = 1e-2) +}) + +test_that("convert_output_type works (quantile >> cdf)", { + ex_qs <- seq(0,1,length.out = 500)[2:499] + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = ex_qs, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + new_output_type = "cdf" + new_output_type_id = seq(-2,2,0.5) + expected <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = new_output_type, + output_type_id = new_output_type_id, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = pnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + new_output_type, new_output_type_id) + expect_equal(expected, test, tolerance = 1e-2) +}) + +test_that("convert_output_type works (cdf >> mean)", { + ex_ps <- seq(-2,10,length.out = 500)[2:499] + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type = "mean" + new_output_type_id = NA + expected <- tibble::tibble( + grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) + ) %>% + dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + new_output_type, new_output_type_id) + expect_equal(expected, test, tolerance = 1e-2) +}) + +test_that("convert_output_type works (cdf >> median)", { + ex_ps <- seq(-2,10,length.out = 500)[2:499] + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type = "median" + new_output_type_id = NA + expected <- tibble::tibble( + grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) + ) %>% + dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + new_output_type, new_output_type_id) + expect_equal(expected, test, tolerance = 1e-2) +}) + +test_that("convert_output_type works (cdf >> quantile)", { + ex_ps <- seq(-2,10,length.out = 500)[2:499] + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type = "quantile" + new_output_type_id = c(0.25, 0.5, 0.75) + expected <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = new_output_type, + output_type_id = new_output_type_id, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) +}) + +### test convert_from_sample() +test_that("convert_from_sample works (return mean)", { + grouped_model_outputs = expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id = 1:5 + ) %>% + dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type = "mean" + new_output_type_id = NA + expected <- grouped_model_outputs %>% + dplyr::summarize(value = mean(value), .groups = "drop") %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_outputs, new_output_type, + new_output_type_id) + expect_equal(expected, test) +}) + +test_that("convert_from_sample works (return median)", { + grouped_model_outputs = expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id = 1:5 + ) %>% + dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type = "median" + new_output_type_id = NA + expected <- grouped_model_outputs %>% + dplyr::summarize(value = median(value), .groups = "drop") %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_outputs, new_output_type, + new_output_type_id) + expect_equal(expected, test) +}) + +test_that("convert_from_sample works (return quantile)", { + grouped_model_outputs = expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id = 1:5 + ) %>% + dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type = "quantile" + new_output_type_id = c(0.25, 0.75) + expected <- grouped_model_outputs %>% + dplyr::reframe(value = quantile(value, new_output_type_id, names = FALSE), + output_type_id = new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_outputs, new_output_type, + new_output_type_id) + expect_equal(expected, test) +}) + +test_that("convert_from_sample works (return cdf)", { + grouped_model_outputs = expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id = 1:5 + ) %>% + dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type = "cdf" + new_output_type_id = seq(0,30, 5) + expected <- grouped_model_outputs %>% + dplyr::reframe(value = ecdf(value)(new_output_type_id), + output_type_id = new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_outputs, new_output_type, + new_output_type_id) + expect_equal(expected, test) +}) + + From bca3fab4617ddd0c9cad0f364bda53d6f7ab60b7 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:51:50 -0400 Subject: [PATCH 02/46] Add documentation --- R/convert_output_types.R | 46 ++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 62755925..37f724cd 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -1,9 +1,41 @@ -#' assume starting output type is sample, inputs have been validated -#' support one input output_type and one output output_type +#' Transform between output types, from one starting output_type into one new +#' output_type. See details for supported conversions. #' -#' @param new_output_type vector of strings indicating the desired output_type after -#' transformation; can be `"mean"`, `"median"`, `"quantile"`, `"cdf"` -#' @param new_output_type_id corresponding output_type_ids, list if multiple output_type ids +#' @param model_outputs an object of class `model_out_tbl` with component model +#' outputs (e.g., predictions). `model_outputs` should contain only one +#' unique value in the `output_type` column. +#' @param group_by_cols a `vector` of task_id column names, i.e., +#' specify which columns to group by when transforming +#' @param new_output_type `string` indicating the desired output_type after +#' transformation; can be `"mean"`, `"median"`, `"quantile"`, `"cdf"`; see +#' details for supported conversions +#' @param new_output_type_id `vector` indicating desired output_type_ids for +#' corresponding `new_output_type`; only needs to be specified if +#' `new_output_type` is `"quantile"` or `"cdf"` +#' @param n_samples `numeric` that specifies the number of samples to use when +#' calculating quantiles from an estimated quantile function. Defaults to `1e4`. +#' @param ... parameters that are passed to `distfromq::make_q_fn`, specifying +#' details of how to estimate a quantile function from provided quantile levels +#' and quantile values for `output_type` `"quantile"`. +#' +#' @details +#' The following transformations are supported: +#' 1. `"sample"` can be transformed to `"mean"`, `"median"`, `"quantile"`, or `"cdf"` +#' 2. `"quantile"` can be transformed to `"mean"`, `"median"`, or `"cdf"` +#' 3. `"cdf"` can be transformed to `"mean"`, `"median"`, or `"quantile"`. +#' For `"quantile"` and `"cdf"` starting output types, we follow the below approach: +#' 1. Interpolate and extrapolate from the provided quantiles or probabilities +#' for each component model to obtain an estimate of the cdf of that distribution. +#' 2. Draw samples from the distribution for each component model. To reduce +#' Monte Carlo variability, we use quasi-random samples corresponding to +#' quantiles of the estimated distribution. +#' 3. Calculate the desired quantity (e.g., mean). +#' If the median quantile is provided in the `model_outputs` object (i.e., +#' the original output_type is `"median"` and 0.5 is contained in the original +#' output_type_id), the median value is extracted and returned directly. +#' +#' @return object of class `model_out_tbl` containing new output_type +#' @export convert_output_type <- function(model_outputs, group_by_cols, new_output_type, new_output_type_id, n_samples = 1e4, ...){ @@ -38,6 +70,7 @@ convert_output_type <- function(model_outputs, group_by_cols, return(model_outputs_transform) } +#' @export get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, ...){ set.seed(101) samples <- model_outputs %>% @@ -54,6 +87,7 @@ get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, return(samples) } +#' @export get_samples_from_cdf <- function(model_outputs, group_by_cols, n_samples, ...){ set.seed(101) samples <- model_outputs %>% @@ -68,6 +102,7 @@ get_samples_from_cdf <- function(model_outputs, group_by_cols, n_samples, ...){ return(samples) } +#' @export convert_from_sample <- function(grouped_model_outputs, new_output_type, new_output_type_id){ if(new_output_type == "mean"){ @@ -97,4 +132,3 @@ convert_from_sample <- function(grouped_model_outputs, new_output_type, hubUtils::as_model_out_tbl() return(model_outputs_transform) } - From 8dd2ad8d8615c600edd4f2b5374b6737d64acc51 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 12:57:20 -0400 Subject: [PATCH 03/46] switch from summarize() to reframe() --- R/convert_output_types.R | 10 ++++------ tests/testthat/test-convert_output_types.R | 4 ++-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 37f724cd..320abad5 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -75,12 +75,11 @@ get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, set.seed(101) samples <- model_outputs %>% dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% - dplyr::summarize( + dplyr::reframe( value = distfromq::make_q_fn( ps = as.numeric(.data$output_type_id), qs = .data$value, ... - )(runif(n_samples, 0, 1)), - .groups = "drop" + )(runif(n_samples, 0, 1)) ) @@ -92,12 +91,11 @@ get_samples_from_cdf <- function(model_outputs, group_by_cols, n_samples, ...){ set.seed(101) samples <- model_outputs %>% dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% - dplyr::summarize( + dplyr::reframe( value = distfromq::make_q_fn( ps = .data$value, qs = as.numeric(.data$output_type_id), ... - )(runif(n_samples, 0, 1)), - .groups = "drop" + )(runif(n_samples, 0, 1)) ) return(samples) } diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index c50444ba..73f6d397 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -175,7 +175,7 @@ test_that("convert_from_sample works (return mean)", { new_output_type = "mean" new_output_type_id = NA expected <- grouped_model_outputs %>% - dplyr::summarize(value = mean(value), .groups = "drop") %>% + dplyr::reframe(value = mean(value)) %>% dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() @@ -195,7 +195,7 @@ test_that("convert_from_sample works (return median)", { new_output_type = "median" new_output_type_id = NA expected <- grouped_model_outputs %>% - dplyr::summarize(value = median(value), .groups = "drop") %>% + dplyr::reframe(value = median(value)) %>% dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() From 48dfffb4208bd9140af88b70956325984fdd48a5 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 14:03:35 -0400 Subject: [PATCH 04/46] add preliminary validations --- R/convert_output_types.R | 28 +++++++++ tests/testthat/test-convert_output_types.R | 69 ++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 320abad5..eddd1ff7 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -42,6 +42,7 @@ convert_output_type <- function(model_outputs, group_by_cols, # for cdf and quantile functions, get samples starting_output_type = model_outputs$output_type %>% unique() starting_output_type_ids = model_outputs$output_type_id %>% unique() + validate_new_output_type(starting_output_type, new_output_type) if(starting_output_type == "cdf"){ # estimate from samples model_outputs <- get_samples_from_cdf(model_outputs, group_by_cols, n_samples) @@ -70,6 +71,33 @@ convert_output_type <- function(model_outputs, group_by_cols, return(model_outputs_transform) } +validate_new_output_type <- function(starting_output_type, new_output_type, + new_output_type_id){ + valid_conversions <- list( + "sample" = c("mean", "median", "quantile", "cdf"), + "quantile" = c("mean", "median", "cdf"), + "cdf" = c("mean", "median", "quantile") + ) + # check starting_output_type is supported + valid_starting_output_type <- starting_output_type %in% names(valid_conversions) + if (!valid_starting_output_type) { + cli::cli_abort(c( + "{.var output_type} provided cannot be transformed", + i = "must be of type {.var sample}, {.var quantile}, {.var cdf}." + ) + ) + } + # check new_output_type is supported + valid_new_output_type <- new_output_type %in% valid_conversions[[starting_output_type]] + if(!valid_new_output_type){ + cli::cli_abort(c( + "{starting_output_type} cannot be transformed to + output_type {new_output_type}", + i = "new_output_type must be {valid_conversions[[starting_output_type]]}" + )) + } +} + #' @export get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, ...){ set.seed(101) diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 73f6d397..a4fd1d27 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -163,6 +163,75 @@ test_that("convert_output_type works (cdf >> quantile)", { expect_equal(test, expected, tolerance = 1e-2) }) +test_that("convert_output_type fails correctly (quantile)",{ + ex_ps <- seq(-2,10,length.out = 500)[2:499] + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type = "quantile" + new_output_type_id = c(0.25, 0.5, 0.75) + expected <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = new_output_type, + output_type_id = new_output_type_id, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) +}) + +test_that("convert_output_type fails correctly: wrong starting output_type", { + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "pmf", + output_type_id = c("bin1", "bin2"), + stringsAsFactors = FALSE + ) + new_output_type = "mean" + expect_error(convert_output_type( + model_outputs, group_by_cols = c("grp1"), new_output_type)) +}) + +test_that("convert_output_type fails correctly: wrong new_output_type (quantile >> pmf)", { + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = c(0.25, 0.5, 0.75), + stringsAsFactors = FALSE + ) + new_output_type = "pmf" + expect_error(convert_output_type( + model_outputs, group_by_cols = c("grp1"), new_output_type)) +}) + +test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sample)", { + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = -1:1, + stringsAsFactors = FALSE + ) + new_output_type = "sample" + expect_error(convert_output_type( + model_outputs, group_by_cols = c("grp1"), new_output_type)) +}) + ### test convert_from_sample() test_that("convert_from_sample works (return mean)", { grouped_model_outputs = expand.grid( From 71dacb990beb4def8c4a92349bcd4490544c6432 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 14:13:47 -0400 Subject: [PATCH 05/46] add validations of new_output_type_id --- R/convert_output_types.R | 35 ++++++++++++++++- tests/testthat/test-convert_output_types.R | 45 ++++++++++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index eddd1ff7..b1bca00b 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -42,7 +42,8 @@ convert_output_type <- function(model_outputs, group_by_cols, # for cdf and quantile functions, get samples starting_output_type = model_outputs$output_type %>% unique() starting_output_type_ids = model_outputs$output_type_id %>% unique() - validate_new_output_type(starting_output_type, new_output_type) + validate_new_output_type(starting_output_type, new_output_type, + new_output_type_id) if(starting_output_type == "cdf"){ # estimate from samples model_outputs <- get_samples_from_cdf(model_outputs, group_by_cols, n_samples) @@ -96,6 +97,38 @@ validate_new_output_type <- function(starting_output_type, new_output_type, i = "new_output_type must be {valid_conversions[[starting_output_type]]}" )) } + # check new_output_type_id + if(new_output_type %in% c("mean", "median") & !all(is.na(new_output_type_id))){ + cli::cli_abort(c( + "{.var new_output_type_id} is incompatible with {.var new_output_type}", + i = "{.var new_output_type_id} should be {.var NA}" + )) + } + else if(new_output_type == "quantile"){ + if(!is.numeric(new_output_type_id)){ + cli::cli_abort(c( + "elements of {.var new_output_type_id} should be numeric", + i = "elements of {.var new_output_type_id} represent quantiles + of the predictive distribution" + )) + } + if(any(new_output_type_id < 0 | new_output_type > 1)){ + cli::cli_abort(c( + "elements of {.var new_output_type_id} should be between 0 and 1", + i = "elements of {.var new_output_type_id} represent quantiles + of the predictive distribution" + )) + } + } + else if(new_output_type == "cdf"){ + if(!is.numeric(new_output_type_id)){ + cli::cli_abort(c( + "elements of {.var new_output_type_id} should be numeric", + i = "elements of {.var new_output_type_id} represent possible + values of the target variable" + )) + } + } } #' @export diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index a4fd1d27..f4fd454c 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -232,6 +232,51 @@ test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sa model_outputs, group_by_cols = c("grp1"), new_output_type)) }) +test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)", { + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = -1:1, + stringsAsFactors = FALSE + ) + new_output_type = "mean" + new_output_type_id = c("A", "B") + expect_error(convert_output_type( + model_outputs, group_by_cols = c("grp1"), new_output_type, + new_output_type_id)) +}) + +test_that("convert_output_type fails correctly: wrong new_output_type_id (quantile)", { + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = -1:1, + stringsAsFactors = FALSE + ) + new_output_type = "quantile" + new_output_type_id = c(-1,0,1) + expect_error(convert_output_type( + model_outputs, group_by_cols = c("grp1"), new_output_type, + new_output_type_id)) +}) + +test_that("convert_output_type fails correctly: wrong new_output_type_id (cdf)", { + model_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = seq(0,1,0.5), + stringsAsFactors = FALSE + ) + new_output_type = "cdf" + new_output_type_id = c("A", "B") + expect_error(convert_output_type( + model_outputs, group_by_cols = c("grp1"), new_output_type, + new_output_type_id)) +}) + ### test convert_from_sample() test_that("convert_from_sample works (return mean)", { grouped_model_outputs = expand.grid( From 9b81d7acefc5dc65f3b68325f38b9365d9270611 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 14:40:24 -0400 Subject: [PATCH 06/46] update documentation, lint --- R/convert_output_types.R | 294 +++++++++++++++++++++------------------ 1 file changed, 157 insertions(+), 137 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index b1bca00b..b7f30a33 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -3,9 +3,10 @@ #' #' @param model_outputs an object of class `model_out_tbl` with component model #' outputs (e.g., predictions). `model_outputs` should contain only one -#' unique value in the `output_type` column. +#' unique value in the `output_type` column #' @param group_by_cols a `vector` of task_id column names, i.e., -#' specify which columns to group by when transforming +#' specify which columns to group by when transforming; the `model_id` +#' column will be included automatically #' @param new_output_type `string` indicating the desired output_type after #' transformation; can be `"mean"`, `"median"`, `"quantile"`, `"cdf"`; see #' details for supported conversions @@ -19,10 +20,11 @@ #' and quantile values for `output_type` `"quantile"`. #' #' @details -#' The following transformations are supported: -#' 1. `"sample"` can be transformed to `"mean"`, `"median"`, `"quantile"`, or `"cdf"` -#' 2. `"quantile"` can be transformed to `"mean"`, `"median"`, or `"cdf"` -#' 3. `"cdf"` can be transformed to `"mean"`, `"median"`, or `"quantile"`. +#' The following transformations are supported: (i) `"sample"` can be +#' transformed to `"mean"`, `"median"`, `"quantile"`, or `"cdf"`; (ii) +#' `"quantile"` can be transformed to `"mean"`, `"median"`, or `"cdf"`; and +#' (iii) `"cdf"` can be transformed to `"mean"`, `"median"`, or `"quantile"`. +#' #' For `"quantile"` and `"cdf"` starting output types, we follow the below approach: #' 1. Interpolate and extrapolate from the provided quantiles or probabilities #' for each component model to obtain an estimate of the cdf of that distribution. @@ -34,160 +36,178 @@ #' the original output_type is `"median"` and 0.5 is contained in the original #' output_type_id), the median value is extracted and returned directly. #' +#' @examples +#' # We illustrate the conversion between output types using normal distributions, +#' ex_quantiles <- c(0.25, 0.5, 0.75) +#' model_outputs <- expand.grid( +#' group1 = c(1,2), +#' model_id = "A", +#' output_type = "quantile", +#' output_type_id = ex_quantiles +#' ) %>% +#' dplyr::mutate(value = qnorm(p = output_type_id, mean = group1)) +#' +#' convert_output_type(model_outputs, c("group1"), new_output_type = "median") +#' #' @return object of class `model_out_tbl` containing new output_type #' @export convert_output_type <- function(model_outputs, group_by_cols, - new_output_type, new_output_type_id, - n_samples = 1e4, ...){ - # for cdf and quantile functions, get samples - starting_output_type = model_outputs$output_type %>% unique() - starting_output_type_ids = model_outputs$output_type_id %>% unique() - validate_new_output_type(starting_output_type, new_output_type, - new_output_type_id) - if(starting_output_type == "cdf"){ - # estimate from samples - model_outputs <- get_samples_from_cdf(model_outputs, group_by_cols, n_samples) - } - else if(starting_output_type == "quantile"){ - # if median output desired, and Q50 provided return exact value - if(new_output_type == "median" & 0.5 %in% starting_output_type_ids){ - model_outputs_transform <- model_outputs %>% - dplyr::filter(output_type_id == 0.5) %>% - dplyr::mutate(output_type = new_output_type, - output_type_id = new_output_type_id) %>% - hubUtils::as_model_out_tbl() - return(model_outputs_transform) - } - # otherwise, estimate from samples - else{ - model_outputs <- get_samples_from_quantiles(model_outputs, group_by_cols, n_samples) - } + new_output_type, new_output_type_id = NA, + n_samples = 1e4, ...) { + # for cdf and quantile functions, get samples + starting_output_type <- model_outputs$output_type %>% unique() + starting_output_type_ids <- model_outputs$output_type_id %>% unique() + validate_new_output_type( + starting_output_type, new_output_type, + new_output_type_id + ) + if (starting_output_type == "cdf") { + # estimate from samples + model_outputs <- get_samples_from_cdf(model_outputs, group_by_cols, n_samples) + } else if (starting_output_type == "quantile") { + # if median output desired, and Q50 provided return exact value + if (new_output_type == "median" && 0.5 %in% starting_output_type_ids) { + model_outputs_transform <- model_outputs %>% + dplyr::filter(output_type_id == 0.5) %>% + dplyr::mutate( + output_type = new_output_type, + output_type_id = new_output_type_id + ) %>% + hubUtils::as_model_out_tbl() + return(model_outputs_transform) + } else { + # otherwise, estimate from samples + model_outputs <- get_samples_from_quantiles(model_outputs, group_by_cols, n_samples) } - # transform based on new_output_type - grouped_model_outputs <- model_outputs %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) - model_outputs_transform <- convert_from_sample( - grouped_model_outputs, new_output_type, new_output_type_id - ) - return(model_outputs_transform) + } + # transform based on new_output_type + grouped_model_outputs <- model_outputs %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) + model_outputs_transform <- convert_from_sample( + grouped_model_outputs, new_output_type, new_output_type_id + ) + return(model_outputs_transform) } validate_new_output_type <- function(starting_output_type, new_output_type, - new_output_type_id){ - valid_conversions <- list( - "sample" = c("mean", "median", "quantile", "cdf"), - "quantile" = c("mean", "median", "cdf"), - "cdf" = c("mean", "median", "quantile") - ) - # check starting_output_type is supported - valid_starting_output_type <- starting_output_type %in% names(valid_conversions) - if (!valid_starting_output_type) { - cli::cli_abort(c( - "{.var output_type} provided cannot be transformed", - i = "must be of type {.var sample}, {.var quantile}, {.var cdf}." - ) - ) - } - # check new_output_type is supported - valid_new_output_type <- new_output_type %in% valid_conversions[[starting_output_type]] - if(!valid_new_output_type){ - cli::cli_abort(c( - "{starting_output_type} cannot be transformed to + new_output_type_id) { + valid_conversions <- list( + "sample" = c("mean", "median", "quantile", "cdf"), + "quantile" = c("mean", "median", "cdf"), + "cdf" = c("mean", "median", "quantile") + ) + # check starting_output_type is supported + valid_starting_output_type <- starting_output_type %in% names(valid_conversions) + if (!valid_starting_output_type) { + cli::cli_abort(c( + "{.var output_type} provided cannot be transformed", + i = "must be of type {.var sample}, {.var quantile}, {.var cdf}." + )) + } + # check new_output_type is supported + valid_new_output_type <- new_output_type %in% valid_conversions[[starting_output_type]] + if (!valid_new_output_type) { + cli::cli_abort(c( + "{starting_output_type} cannot be transformed to output_type {new_output_type}", - i = "new_output_type must be {valid_conversions[[starting_output_type]]}" - )) - } - # check new_output_type_id - if(new_output_type %in% c("mean", "median") & !all(is.na(new_output_type_id))){ - cli::cli_abort(c( - "{.var new_output_type_id} is incompatible with {.var new_output_type}", - i = "{.var new_output_type_id} should be {.var NA}" - )) - } - else if(new_output_type == "quantile"){ - if(!is.numeric(new_output_type_id)){ - cli::cli_abort(c( - "elements of {.var new_output_type_id} should be numeric", - i = "elements of {.var new_output_type_id} represent quantiles + i = "new_output_type must be {valid_conversions[[starting_output_type]]}" + )) + } + # check new_output_type_id + if (new_output_type %in% c("mean", "median") && !all(is.na(new_output_type_id))) { + cli::cli_abort(c( + "{.var new_output_type_id} is incompatible with {.var new_output_type}", + i = "{.var new_output_type_id} should be {.var NA}" + )) + } else if (new_output_type == "quantile") { + if (!is.numeric(new_output_type_id)) { + cli::cli_abort(c( + "elements of {.var new_output_type_id} should be numeric", + i = "elements of {.var new_output_type_id} represent quantiles of the predictive distribution" - )) - } - if(any(new_output_type_id < 0 | new_output_type > 1)){ - cli::cli_abort(c( - "elements of {.var new_output_type_id} should be between 0 and 1", - i = "elements of {.var new_output_type_id} represent quantiles + )) + } + if (any(new_output_type_id < 0 || new_output_type > 1)) { + cli::cli_abort(c( + "elements of {.var new_output_type_id} should be between 0 and 1", + i = "elements of {.var new_output_type_id} represent quantiles of the predictive distribution" - )) - } + )) } - else if(new_output_type == "cdf"){ - if(!is.numeric(new_output_type_id)){ - cli::cli_abort(c( - "elements of {.var new_output_type_id} should be numeric", - i = "elements of {.var new_output_type_id} represent possible + } else if (new_output_type == "cdf") { + if (!is.numeric(new_output_type_id)) { + cli::cli_abort(c( + "elements of {.var new_output_type_id} should be numeric", + i = "elements of {.var new_output_type_id} represent possible values of the target variable" - )) - } + )) } + } } #' @export -get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, ...){ - set.seed(101) - samples <- model_outputs %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% - dplyr::reframe( - value = distfromq::make_q_fn( - ps = as.numeric(.data$output_type_id), - qs = .data$value, ... - )(runif(n_samples, 0, 1)) - ) +get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, ...) { + set.seed(101) + samples <- model_outputs %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% + dplyr::reframe( + value = distfromq::make_q_fn( + ps = as.numeric(.data$output_type_id), + qs = .data$value, ... + )(runif(n_samples, 0, 1)) + ) - return(samples) + return(samples) } #' @export -get_samples_from_cdf <- function(model_outputs, group_by_cols, n_samples, ...){ - set.seed(101) - samples <- model_outputs %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% - dplyr::reframe( - value = distfromq::make_q_fn( - ps = .data$value, - qs = as.numeric(.data$output_type_id), ... - )(runif(n_samples, 0, 1)) - ) - return(samples) +get_samples_from_cdf <- function(model_outputs, group_by_cols, n_samples, ...) { + set.seed(101) + samples <- model_outputs %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% + dplyr::reframe( + value = distfromq::make_q_fn( + ps = .data$value, + qs = as.numeric(.data$output_type_id), ... + )(runif(n_samples, 0, 1)) + ) + return(samples) } #' @export convert_from_sample <- function(grouped_model_outputs, new_output_type, - new_output_type_id){ - if(new_output_type == "mean"){ - model_outputs_transform <- grouped_model_outputs %>% - dplyr::reframe(value = mean(value), - output_type_id = new_output_type_id) - } - else if(new_output_type == "median"){ - model_outputs_transform <- grouped_model_outputs %>% - dplyr::reframe(value = median(value), - output_type_id = new_output_type_id) - } - else if(new_output_type == "quantile"){ - model_outputs_transform <- grouped_model_outputs %>% - dplyr::reframe(value = quantile(value, as.numeric(new_output_type_id), - names = FALSE), - output_type_id = new_output_type_id) - } - else if(new_output_type == "cdf"){ - model_outputs_transform <- grouped_model_outputs %>% - dplyr::reframe(value = ecdf(value)(as.numeric(new_output_type_id)), - output_type_id = new_output_type_id) - } - # update output_type and output_type_id columns - model_outputs_transform <- model_outputs_transform %>% - dplyr::mutate(output_type = new_output_type) %>% - hubUtils::as_model_out_tbl() - return(model_outputs_transform) + new_output_type_id) { + if (new_output_type == "mean") { + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe( + value = mean(value), + output_type_id = new_output_type_id + ) + } else if (new_output_type == "median") { + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe( + value = median(value), + output_type_id = new_output_type_id + ) + } else if (new_output_type == "quantile") { + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe( + value = quantile(value, as.numeric(new_output_type_id), + names = FALSE + ), + output_type_id = new_output_type_id + ) + } else if (new_output_type == "cdf") { + model_outputs_transform <- grouped_model_outputs %>% + dplyr::reframe( + value = ecdf(value)(as.numeric(new_output_type_id)), + output_type_id = new_output_type_id + ) + } + # update output_type and output_type_id columns + model_outputs_transform <- model_outputs_transform %>% + dplyr::mutate(output_type = new_output_type) %>% + hubUtils::as_model_out_tbl() + return(model_outputs_transform) } From d8f9e4b9b0b56d34175593c0f16e0300dd63fe3a Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 14:49:14 -0400 Subject: [PATCH 07/46] Update DESCRIPTION --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 9342031c..06978e26 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,6 +35,7 @@ Suggests: covr, curl, digest, + distfromq, dplyr, fs, gh, From 5b9e31144e707a31ed761f98dcfc924a24eca9f4 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:42:16 -0400 Subject: [PATCH 08/46] Create get_task_id_cols.R --- R/get_task_id_cols.R | 6 ++++++ tests/testthat/test-get_task_id_cols.R | 15 +++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 R/get_task_id_cols.R create mode 100644 tests/testthat/test-get_task_id_cols.R diff --git a/R/get_task_id_cols.R b/R/get_task_id_cols.R new file mode 100644 index 00000000..4cf4a00c --- /dev/null +++ b/R/get_task_id_cols.R @@ -0,0 +1,6 @@ +#' @export +get_task_id_cols <- function(model_out_tbl){ + model_out_cols <- colnames(model_out_tbl) + task_id_cols <- model_out_cols[!model_out_cols %in% std_colnames] + return(task_id_cols) +} diff --git a/tests/testthat/test-get_task_id_cols.R b/tests/testthat/test-get_task_id_cols.R new file mode 100644 index 00000000..1a5dc74f --- /dev/null +++ b/tests/testthat/test-get_task_id_cols.R @@ -0,0 +1,15 @@ +test_that("get_task_id_cols works", { + ex_qs <- seq(0,1,length.out = 5) + model_outputs <- expand.grid( + grp1 = 1:2, + grp2 = 1:3, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = ex_qs, + stringsAsFactors = FALSE + ) + model_outputs$value <- runif(nrow(model_outputs)) + expected <- c("grp1", "grp2") + test <- get_task_id_cols(model_outputs) + expect_equal(expected, test) +}) From 2a6c5955fce2f9396e6a19f4352c3f6a46683aa5 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:46:39 -0400 Subject: [PATCH 09/46] use get_task_id_cols() --- R/convert_output_types.R | 35 +++++++++---------- tests/testthat/test-convert_output_types.R | 39 ++++++++++------------ 2 files changed, 33 insertions(+), 41 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index b7f30a33..5370f029 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -4,9 +4,6 @@ #' @param model_outputs an object of class `model_out_tbl` with component model #' outputs (e.g., predictions). `model_outputs` should contain only one #' unique value in the `output_type` column -#' @param group_by_cols a `vector` of task_id column names, i.e., -#' specify which columns to group by when transforming; the `model_id` -#' column will be included automatically #' @param new_output_type `string` indicating the desired output_type after #' transformation; can be `"mean"`, `"median"`, `"quantile"`, `"cdf"`; see #' details for supported conversions @@ -51,19 +48,19 @@ #' #' @return object of class `model_out_tbl` containing new output_type #' @export -convert_output_type <- function(model_outputs, group_by_cols, - new_output_type, new_output_type_id = NA, - n_samples = 1e4, ...) { - # for cdf and quantile functions, get samples +convert_output_type <- function(model_outputs, new_output_type, + new_output_type_id = NA, n_samples = 1e4, ...) { + # validations + task_id_cols <- get_task_id_cols(model_outputs) starting_output_type <- model_outputs$output_type %>% unique() starting_output_type_ids <- model_outputs$output_type_id %>% unique() - validate_new_output_type( - starting_output_type, new_output_type, - new_output_type_id - ) + task_id_cols <- get_task_id_cols(model_outputs) + validate_new_output_type(starting_output_type, new_output_type, + new_output_type_id) + # for cdf and quantile functions, get samples if (starting_output_type == "cdf") { # estimate from samples - model_outputs <- get_samples_from_cdf(model_outputs, group_by_cols, n_samples) + model_outputs <- get_samples_from_cdf(model_outputs, task_id_cols, n_samples) } else if (starting_output_type == "quantile") { # if median output desired, and Q50 provided return exact value if (new_output_type == "median" && 0.5 %in% starting_output_type_ids) { @@ -77,12 +74,12 @@ convert_output_type <- function(model_outputs, group_by_cols, return(model_outputs_transform) } else { # otherwise, estimate from samples - model_outputs <- get_samples_from_quantiles(model_outputs, group_by_cols, n_samples) + model_outputs <- get_samples_from_quantiles(model_outputs, task_id_cols, n_samples) } } # transform based on new_output_type grouped_model_outputs <- model_outputs %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) model_outputs_transform <- convert_from_sample( grouped_model_outputs, new_output_type, new_output_type_id ) @@ -127,7 +124,7 @@ validate_new_output_type <- function(starting_output_type, new_output_type, of the predictive distribution" )) } - if (any(new_output_type_id < 0 || new_output_type > 1)) { + if (any(new_output_type_id < 0) || any(new_output_type_id > 1)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be between 0 and 1", i = "elements of {.var new_output_type_id} represent quantiles @@ -146,10 +143,10 @@ validate_new_output_type <- function(starting_output_type, new_output_type, } #' @export -get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, ...) { +get_samples_from_quantiles <- function(model_outputs, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_outputs %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( ps = as.numeric(.data$output_type_id), @@ -162,10 +159,10 @@ get_samples_from_quantiles <- function(model_outputs, group_by_cols, n_samples, } #' @export -get_samples_from_cdf <- function(model_outputs, group_by_cols, n_samples, ...) { +get_samples_from_cdf <- function(model_outputs, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_outputs %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(group_by_cols))) %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( ps = .data$value, diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index f4fd454c..c20cd536 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -20,8 +20,7 @@ test_that("convert_output_type works (quantile >> mean)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), - new_output_type, new_output_type_id) + test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) @@ -46,8 +45,7 @@ test_that("convert_output_type works (quantile >> median)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), - new_output_type, new_output_type_id) + test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) @@ -76,8 +74,7 @@ test_that("convert_output_type works (quantile >> cdf)", { dplyr:: mutate(value = pnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), - new_output_type, new_output_type_id) + test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) @@ -102,8 +99,7 @@ test_that("convert_output_type works (cdf >> mean)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), - new_output_type, new_output_type_id) + test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) @@ -128,7 +124,7 @@ test_that("convert_output_type works (cdf >> median)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) @@ -158,7 +154,7 @@ test_that("convert_output_type works (cdf >> quantile)", { dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), + test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) @@ -188,8 +184,7 @@ test_that("convert_output_type fails correctly (quantile)",{ dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, group_by_cols = c("grp1"), - new_output_type, new_output_type_id) + test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) @@ -203,7 +198,7 @@ test_that("convert_output_type fails correctly: wrong starting output_type", { ) new_output_type = "mean" expect_error(convert_output_type( - model_outputs, group_by_cols = c("grp1"), new_output_type)) + model_outputs, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (quantile >> pmf)", { @@ -216,7 +211,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type (quantile ) new_output_type = "pmf" expect_error(convert_output_type( - model_outputs, group_by_cols = c("grp1"), new_output_type)) + model_outputs, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sample)", { @@ -229,7 +224,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sa ) new_output_type = "sample" expect_error(convert_output_type( - model_outputs, group_by_cols = c("grp1"), new_output_type)) + model_outputs, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)", { @@ -243,7 +238,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)" new_output_type = "mean" new_output_type_id = c("A", "B") expect_error(convert_output_type( - model_outputs, group_by_cols = c("grp1"), new_output_type, + model_outputs, new_output_type, new_output_type_id)) }) @@ -257,9 +252,9 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (quanti ) new_output_type = "quantile" new_output_type_id = c(-1,0,1) - expect_error(convert_output_type( - model_outputs, group_by_cols = c("grp1"), new_output_type, - new_output_type_id)) + expect_error( + convert_output_type(model_outputs, new_output_type, new_output_type_id) + ) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (cdf)", { @@ -272,9 +267,9 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (cdf)", ) new_output_type = "cdf" new_output_type_id = c("A", "B") - expect_error(convert_output_type( - model_outputs, group_by_cols = c("grp1"), new_output_type, - new_output_type_id)) + expect_error( + convert_output_type(model_outputs, new_output_type, new_output_type_id) + ) }) ### test convert_from_sample() From 10ff6536ca524d1690a0013a7bbc99d47914ed99 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:42:16 -0400 Subject: [PATCH 10/46] Create get_task_id_cols.R (#149) From 72435702f5cbe25dcfa8098071061d7699c989c7 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 15:46:39 -0400 Subject: [PATCH 11/46] use get_task_id_cols() From 243cdb50c46b9dd8c42bc615b9500289ab63422c Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:12:38 -0400 Subject: [PATCH 12/46] Merge branch 'output-type-conversion' of https://github.com/Infectious-Disease-Modeling-Hubs/hubUtils into output-type-conversion From cc4e002fb13ccc9195fa89b6945b82424267408d Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:20:38 -0400 Subject: [PATCH 13/46] switch model_outputs to model_out_tbl --- R/convert_output_types.R | 58 ++++++++--------- tests/testthat/test-convert_output_types.R | 76 +++++++++++----------- 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 5370f029..3b98faf6 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -1,8 +1,8 @@ #' Transform between output types, from one starting output_type into one new #' output_type. See details for supported conversions. #' -#' @param model_outputs an object of class `model_out_tbl` with component model -#' outputs (e.g., predictions). `model_outputs` should contain only one +#' @param model_out_tbl an object of class `model_out_tbl` with component model +#' outputs (e.g., predictions). `model_out_tbl` should contain only one #' unique value in the `output_type` column #' @param new_output_type `string` indicating the desired output_type after #' transformation; can be `"mean"`, `"median"`, `"quantile"`, `"cdf"`; see @@ -29,14 +29,14 @@ #' Monte Carlo variability, we use quasi-random samples corresponding to #' quantiles of the estimated distribution. #' 3. Calculate the desired quantity (e.g., mean). -#' If the median quantile is provided in the `model_outputs` object (i.e., +#' If the median quantile is provided in the `model_out_tbl` object (i.e., #' the original output_type is `"median"` and 0.5 is contained in the original #' output_type_id), the median value is extracted and returned directly. #' #' @examples #' # We illustrate the conversion between output types using normal distributions, #' ex_quantiles <- c(0.25, 0.5, 0.75) -#' model_outputs <- expand.grid( +#' model_out_tbl <- expand.grid( #' group1 = c(1,2), #' model_id = "A", #' output_type = "quantile", @@ -44,46 +44,46 @@ #' ) %>% #' dplyr::mutate(value = qnorm(p = output_type_id, mean = group1)) #' -#' convert_output_type(model_outputs, c("group1"), new_output_type = "median") +#' convert_output_type(model_out_tbl, c("group1"), new_output_type = "median") #' #' @return object of class `model_out_tbl` containing new output_type #' @export -convert_output_type <- function(model_outputs, new_output_type, +convert_output_type <- function(model_out_tbl, new_output_type, new_output_type_id = NA, n_samples = 1e4, ...) { # validations - task_id_cols <- get_task_id_cols(model_outputs) - starting_output_type <- model_outputs$output_type %>% unique() - starting_output_type_ids <- model_outputs$output_type_id %>% unique() - task_id_cols <- get_task_id_cols(model_outputs) + task_id_cols <- get_task_id_cols(model_out_tbl) + starting_output_type <- model_out_tbl$output_type %>% unique() + starting_output_type_ids <- model_out_tbl$output_type_id %>% unique() + task_id_cols <- get_task_id_cols(model_out_tbl) validate_new_output_type(starting_output_type, new_output_type, new_output_type_id) # for cdf and quantile functions, get samples if (starting_output_type == "cdf") { # estimate from samples - model_outputs <- get_samples_from_cdf(model_outputs, task_id_cols, n_samples) + model_out_tbl <- get_samples_from_cdf(model_out_tbl, task_id_cols, n_samples) } else if (starting_output_type == "quantile") { # if median output desired, and Q50 provided return exact value if (new_output_type == "median" && 0.5 %in% starting_output_type_ids) { - model_outputs_transform <- model_outputs %>% + model_out_tbl_transform <- model_out_tbl %>% dplyr::filter(output_type_id == 0.5) %>% dplyr::mutate( output_type = new_output_type, output_type_id = new_output_type_id ) %>% hubUtils::as_model_out_tbl() - return(model_outputs_transform) + return(model_out_tbl_transform) } else { # otherwise, estimate from samples - model_outputs <- get_samples_from_quantiles(model_outputs, task_id_cols, n_samples) + model_out_tbl <- get_samples_from_quantiles(model_out_tbl, task_id_cols, n_samples) } } # transform based on new_output_type - grouped_model_outputs <- model_outputs %>% + grouped_model_out_tbl <- model_out_tbl %>% dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) - model_outputs_transform <- convert_from_sample( - grouped_model_outputs, new_output_type, new_output_type_id + model_out_tbl_transform <- convert_from_sample( + grouped_model_out_tbl, new_output_type, new_output_type_id ) - return(model_outputs_transform) + return(model_out_tbl_transform) } validate_new_output_type <- function(starting_output_type, new_output_type, @@ -143,9 +143,9 @@ validate_new_output_type <- function(starting_output_type, new_output_type, } #' @export -get_samples_from_quantiles <- function(model_outputs, task_id_cols, n_samples, ...) { +get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) - samples <- model_outputs %>% + samples <- model_out_tbl %>% dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( @@ -159,9 +159,9 @@ get_samples_from_quantiles <- function(model_outputs, task_id_cols, n_samples, . } #' @export -get_samples_from_cdf <- function(model_outputs, task_id_cols, n_samples, ...) { +get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) - samples <- model_outputs %>% + samples <- model_out_tbl %>% dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( @@ -173,22 +173,22 @@ get_samples_from_cdf <- function(model_outputs, task_id_cols, n_samples, ...) { } #' @export -convert_from_sample <- function(grouped_model_outputs, new_output_type, +convert_from_sample <- function(grouped_model_out_tbl, new_output_type, new_output_type_id) { if (new_output_type == "mean") { - model_outputs_transform <- grouped_model_outputs %>% + model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( value = mean(value), output_type_id = new_output_type_id ) } else if (new_output_type == "median") { - model_outputs_transform <- grouped_model_outputs %>% + model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( value = median(value), output_type_id = new_output_type_id ) } else if (new_output_type == "quantile") { - model_outputs_transform <- grouped_model_outputs %>% + model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( value = quantile(value, as.numeric(new_output_type_id), names = FALSE @@ -196,15 +196,15 @@ convert_from_sample <- function(grouped_model_outputs, new_output_type, output_type_id = new_output_type_id ) } else if (new_output_type == "cdf") { - model_outputs_transform <- grouped_model_outputs %>% + model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( value = ecdf(value)(as.numeric(new_output_type_id)), output_type_id = new_output_type_id ) } # update output_type and output_type_id columns - model_outputs_transform <- model_outputs_transform %>% + model_out_tbl_transform <- model_out_tbl_transform %>% dplyr::mutate(output_type = new_output_type) %>% hubUtils::as_model_out_tbl() - return(model_outputs_transform) + return(model_out_tbl_transform) } diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index c20cd536..a93d59b5 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -1,7 +1,7 @@ ### test convert_output_type() test_that("convert_output_type works (quantile >> mean)", { ex_qs <- seq(0,1,length.out = 500)[2:499] - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "quantile", @@ -20,13 +20,13 @@ test_that("convert_output_type works (quantile >> mean)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> median)", { ex_qs <- seq(0,1,length.out = 500)[2:499] - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "quantile", @@ -45,13 +45,13 @@ test_that("convert_output_type works (quantile >> median)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> cdf)", { ex_qs <- seq(0,1,length.out = 500)[2:499] - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "quantile", @@ -74,13 +74,13 @@ test_that("convert_output_type works (quantile >> cdf)", { dplyr:: mutate(value = pnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> mean)", { ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "cdf", @@ -99,13 +99,13 @@ test_that("convert_output_type works (cdf >> mean)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> median)", { ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "cdf", @@ -124,14 +124,14 @@ test_that("convert_output_type works (cdf >> median)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> quantile)", { ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "cdf", @@ -154,14 +154,14 @@ test_that("convert_output_type works (cdf >> quantile)", { dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type fails correctly (quantile)",{ ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "cdf", @@ -184,12 +184,12 @@ test_that("convert_output_type fails correctly (quantile)",{ dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() - test <- convert_output_type(model_outputs, new_output_type, new_output_type_id) + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type fails correctly: wrong starting output_type", { - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "pmf", @@ -198,11 +198,11 @@ test_that("convert_output_type fails correctly: wrong starting output_type", { ) new_output_type = "mean" expect_error(convert_output_type( - model_outputs, new_output_type)) + model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (quantile >> pmf)", { - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "quantile", @@ -211,11 +211,11 @@ test_that("convert_output_type fails correctly: wrong new_output_type (quantile ) new_output_type = "pmf" expect_error(convert_output_type( - model_outputs, new_output_type)) + model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sample)", { - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "cdf", @@ -224,11 +224,11 @@ test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sa ) new_output_type = "sample" expect_error(convert_output_type( - model_outputs, new_output_type)) + model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)", { - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "cdf", @@ -238,12 +238,12 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)" new_output_type = "mean" new_output_type_id = c("A", "B") expect_error(convert_output_type( - model_outputs, new_output_type, + model_out_tbl, new_output_type, new_output_type_id)) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (quantile)", { - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "cdf", @@ -253,12 +253,12 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (quanti new_output_type = "quantile" new_output_type_id = c(-1,0,1) expect_error( - convert_output_type(model_outputs, new_output_type, new_output_type_id) + convert_output_type(model_out_tbl, new_output_type, new_output_type_id) ) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (cdf)", { - model_outputs <- expand.grid( + model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type = "quantile", @@ -268,13 +268,13 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (cdf)", new_output_type = "cdf" new_output_type_id = c("A", "B") expect_error( - convert_output_type(model_outputs, new_output_type, new_output_type_id) + convert_output_type(model_out_tbl, new_output_type, new_output_type_id) ) }) ### test convert_from_sample() test_that("convert_from_sample works (return mean)", { - grouped_model_outputs = expand.grid( + grouped_model_out_tbl = expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type_id = 1:5 @@ -283,18 +283,18 @@ test_that("convert_from_sample works (return mean)", { dplyr::group_by(grp1, model_id) new_output_type = "mean" new_output_type_id = NA - expected <- grouped_model_outputs %>% + expected <- grouped_model_out_tbl %>% dplyr::reframe(value = mean(value)) %>% dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_outputs, new_output_type, + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test) }) test_that("convert_from_sample works (return median)", { - grouped_model_outputs = expand.grid( + grouped_model_out_tbl = expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type_id = 1:5 @@ -303,18 +303,18 @@ test_that("convert_from_sample works (return median)", { dplyr::group_by(grp1, model_id) new_output_type = "median" new_output_type_id = NA - expected <- grouped_model_outputs %>% + expected <- grouped_model_out_tbl %>% dplyr::reframe(value = median(value)) %>% dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_outputs, new_output_type, + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test) }) test_that("convert_from_sample works (return quantile)", { - grouped_model_outputs = expand.grid( + grouped_model_out_tbl = expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type_id = 1:5 @@ -323,18 +323,18 @@ test_that("convert_from_sample works (return quantile)", { dplyr::group_by(grp1, model_id) new_output_type = "quantile" new_output_type_id = c(0.25, 0.75) - expected <- grouped_model_outputs %>% + expected <- grouped_model_out_tbl %>% dplyr::reframe(value = quantile(value, new_output_type_id, names = FALSE), output_type_id = new_output_type_id) %>% dplyr::mutate(output_type = new_output_type) %>% hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_outputs, new_output_type, + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test) }) test_that("convert_from_sample works (return cdf)", { - grouped_model_outputs = expand.grid( + grouped_model_out_tbl = expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], output_type_id = 1:5 @@ -343,12 +343,12 @@ test_that("convert_from_sample works (return cdf)", { dplyr::group_by(grp1, model_id) new_output_type = "cdf" new_output_type_id = seq(0,30, 5) - expected <- grouped_model_outputs %>% + expected <- grouped_model_out_tbl %>% dplyr::reframe(value = ecdf(value)(new_output_type_id), output_type_id = new_output_type_id) %>% dplyr::mutate(output_type = new_output_type) %>% hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_outputs, new_output_type, + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(expected, test) }) From 4db0992474fadd28b086b9bd1423882f768c847d Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Fri, 7 Jun 2024 11:17:48 -0400 Subject: [PATCH 14/46] add functionality for multiple new output_types --- R/convert_output_types.R | 125 ++++++++++++++------- tests/testthat/test-convert_output_types.R | 97 +++++++++++++--- 2 files changed, 162 insertions(+), 60 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 3b98faf6..16c839c4 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -1,20 +1,21 @@ -#' Transform between output types, from one starting output_type into one new -#' output_type. See details for supported conversions. +#' Transform between output types, from one starting output_type into new +#' output_types. See details for supported conversions. #' #' @param model_out_tbl an object of class `model_out_tbl` with component model #' outputs (e.g., predictions). `model_out_tbl` should contain only one #' unique value in the `output_type` column #' @param new_output_type `string` indicating the desired output_type after -#' transformation; can be `"mean"`, `"median"`, `"quantile"`, `"cdf"`; see -#' details for supported conversions +#' transformation (`"mean"`, `"median"`, `"quantile"`, `"cdf"`); can also be a +#' vector if multiple new output_types are desired #' @param new_output_type_id `vector` indicating desired output_type_ids for #' corresponding `new_output_type`; only needs to be specified if -#' `new_output_type` is `"quantile"` or `"cdf"` +#' `new_output_type` includes `"quantile"` or `"cdf"` (see details for how to +#' specify when both `"quantile"` and `"cdf"` are desired) #' @param n_samples `numeric` that specifies the number of samples to use when -#' calculating quantiles from an estimated quantile function. Defaults to `1e4`. +#' calculating output_types from an estimated quantile function. Defaults to `1e4`. #' @param ... parameters that are passed to `distfromq::make_q_fn`, specifying #' details of how to estimate a quantile function from provided quantile levels -#' and quantile values for `output_type` `"quantile"`. +#' and quantile values for `"quantile"` or `"cdf"` output types. #' #' @details #' The following transformations are supported: (i) `"sample"` can be @@ -22,16 +23,20 @@ #' `"quantile"` can be transformed to `"mean"`, `"median"`, or `"cdf"`; and #' (iii) `"cdf"` can be transformed to `"mean"`, `"median"`, or `"quantile"`. #' -#' For `"quantile"` and `"cdf"` starting output types, we follow the below approach: -#' 1. Interpolate and extrapolate from the provided quantiles or probabilities +#' For `"quantile"` and `"cdf"` starting output types, we use the following approach: +#' 1. Interpolate and extrapolate from the provided quantiles or probabilities #' for each component model to obtain an estimate of the cdf of that distribution. -#' 2. Draw samples from the distribution for each component model. To reduce +#' 2. Draw samples from the distribution for each component model. To reduce #' Monte Carlo variability, we use quasi-random samples corresponding to #' quantiles of the estimated distribution. -#' 3. Calculate the desired quantity (e.g., mean). -#' If the median quantile is provided in the `model_out_tbl` object (i.e., -#' the original output_type is `"median"` and 0.5 is contained in the original -#' output_type_id), the median value is extracted and returned directly. +#' 3. Calculate the desired quantity (e.g., mean). +#' If the median quantile is provided in the `model_out_tbl` object (i.e., the +#' original output_type is `"median"` and 0.5 is contained in the original +#' output_type_id), the median value is extracted and returned directly. +#' +#' If both `"quantile"` and `"cdf"` output_types are desired, `new_output_type_id` +#' should be a named list, where each element specifies the corresponding +#' `new_output_type_id`. See examples for an illustration. #' #' @examples #' # We illustrate the conversion between output types using normal distributions, @@ -46,6 +51,20 @@ #' #' convert_output_type(model_out_tbl, c("group1"), new_output_type = "median") #' +#' # Next, we illustrate conversion from samples to quantile and cdf +#' ex_bins <- seq(-2,2,1) +#' ex_quantiles <- c(0.25, 0.5, 0.75) +#' model_out_tbl <- expand.grid( +#' group1 = c(1,2), +#' model_id = "A +#' output_type = "sample", +#' output_type_id = 1:100 +#' ) %>% +#' dplyr::mutate(value = rnorm(100, mean = group1)) +#' +#' convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), +#' new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) +#' #' @return object of class `model_out_tbl` containing new output_type #' @export convert_output_type <- function(model_out_tbl, new_output_type, @@ -55,35 +74,50 @@ convert_output_type <- function(model_out_tbl, new_output_type, starting_output_type <- model_out_tbl$output_type %>% unique() starting_output_type_ids <- model_out_tbl$output_type_id %>% unique() task_id_cols <- get_task_id_cols(model_out_tbl) - validate_new_output_type(starting_output_type, new_output_type, - new_output_type_id) + validate_new_output_type( + starting_output_type, new_output_type, + new_output_type_id + ) # for cdf and quantile functions, get samples if (starting_output_type == "cdf") { # estimate from samples model_out_tbl <- get_samples_from_cdf(model_out_tbl, task_id_cols, n_samples) } else if (starting_output_type == "quantile") { + # if median output desired, and Q50 provided return exact value, otherwise + # estimate from samples + if (!("median" %in% new_output_type && 0.5 %in% starting_output_type_ids)) { + model_out_tbl <- get_samples_from_quantiles(model_out_tbl, task_id_cols, n_samples) + } + } + # transform based on new_output_type + grouped_model_out_tbl <- model_out_tbl %>% + dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) + model_out_tbl_transform <- vector("list", length = length(new_output_type)) + for (i in 1:length(new_output_type)) { # if median output desired, and Q50 provided return exact value - if (new_output_type == "median" && 0.5 %in% starting_output_type_ids) { - model_out_tbl_transform <- model_out_tbl %>% + if (new_output_type[i] == "median" && 0.5 %in% starting_output_type_ids) { + model_out_tbl_transform[[i]] <- model_out_tbl %>% dplyr::filter(output_type_id == 0.5) %>% dplyr::mutate( - output_type = new_output_type, - output_type_id = new_output_type_id + output_type = new_output_type[i], + output_type_id = NA ) %>% hubUtils::as_model_out_tbl() - return(model_out_tbl_transform) - } else { - # otherwise, estimate from samples - model_out_tbl <- get_samples_from_quantiles(model_out_tbl, task_id_cols, n_samples) } + # otherwise calculate new values + # first find new_output_type_id + new_output_type_id_tmp <- new_output_type_id + if (new_output_type[i] %in% c("mean", "median")) { + new_output_type_id_tmp <- NA + } else + if (is.list(new_output_type_id)) { + new_output_type_id_tmp <- new_output_type_id[[new_output_type[i]]] + } + model_out_tbl_transform[[i]] <- convert_from_sample( + grouped_model_out_tbl, new_output_type[i], new_output_type_id_tmp + ) } - # transform based on new_output_type - grouped_model_out_tbl <- model_out_tbl %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) - model_out_tbl_transform <- convert_from_sample( - grouped_model_out_tbl, new_output_type, new_output_type_id - ) - return(model_out_tbl_transform) + return(dplyr::bind_rows(model_out_tbl_transform)) } validate_new_output_type <- function(starting_output_type, new_output_type, @@ -102,29 +136,33 @@ validate_new_output_type <- function(starting_output_type, new_output_type, )) } # check new_output_type is supported - valid_new_output_type <- new_output_type %in% valid_conversions[[starting_output_type]] - if (!valid_new_output_type) { + invalid_new_output_type <- which(!(new_output_type %in% valid_conversions[[starting_output_type]])) + if (length(invalid_new_output_type) > 0) { cli::cli_abort(c( - "{starting_output_type} cannot be transformed to - output_type {new_output_type}", + "{invalid_new_output_type} cannot be transformed to the specified + {.var new_output_type}", i = "new_output_type must be {valid_conversions[[starting_output_type]]}" )) } # check new_output_type_id - if (new_output_type %in% c("mean", "median") && !all(is.na(new_output_type_id))) { + if (all(new_output_type %in% c("mean", "median")) && !all(is.na(new_output_type_id))) { cli::cli_abort(c( "{.var new_output_type_id} is incompatible with {.var new_output_type}", i = "{.var new_output_type_id} should be {.var NA}" )) - } else if (new_output_type == "quantile") { - if (!is.numeric(new_output_type_id)) { + } else if ("quantile" %in% new_output_type) { + new_output_type_id_tmp <- new_output_type_id + if (is.list(new_output_type_id)) { + new_output_type_id_tmp <- new_output_type_id[["quantile"]] + } + if (!is.numeric(new_output_type_id_tmp)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be numeric", i = "elements of {.var new_output_type_id} represent quantiles of the predictive distribution" )) } - if (any(new_output_type_id < 0) || any(new_output_type_id > 1)) { + if (any(new_output_type_id_tmp < 0) || any(new_output_type_id_tmp > 1)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be between 0 and 1", i = "elements of {.var new_output_type_id} represent quantiles @@ -132,7 +170,11 @@ validate_new_output_type <- function(starting_output_type, new_output_type, )) } } else if (new_output_type == "cdf") { - if (!is.numeric(new_output_type_id)) { + new_output_type_id_tmp <- new_output_type_id + if (is.list(new_output_type_id)) { + new_output_type_id_tmp <- new_output_type_id[["cdf"]] + } + if (!is.numeric(new_output_type_id_tmp)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be numeric", i = "elements of {.var new_output_type_id} represent possible @@ -190,8 +232,7 @@ convert_from_sample <- function(grouped_model_out_tbl, new_output_type, } else if (new_output_type == "quantile") { model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( - value = quantile(value, as.numeric(new_output_type_id), - names = FALSE + value = quantile(value, as.numeric(new_output_type_id), names = FALSE ), output_type_id = new_output_type_id ) diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index a93d59b5..865596a4 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -21,7 +21,7 @@ test_that("convert_output_type works (quantile >> mean)", { output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test, tolerance = 1e-2) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> median)", { @@ -46,7 +46,7 @@ test_that("convert_output_type works (quantile >> median)", { output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test, tolerance = 1e-2) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> cdf)", { @@ -75,7 +75,7 @@ test_that("convert_output_type works (quantile >> cdf)", { dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test, tolerance = 1e-2) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> mean)", { @@ -100,7 +100,7 @@ test_that("convert_output_type works (cdf >> mean)", { output_type_id = new_output_type_id) %>% hubUtils::as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test, tolerance = 1e-2) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> median)", { @@ -126,7 +126,7 @@ test_that("convert_output_type works (cdf >> median)", { hubUtils::as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test, tolerance = 1e-2) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> quantile)", { @@ -159,6 +159,72 @@ test_that("convert_output_type works (cdf >> quantile)", { expect_equal(test, expected, tolerance = 1e-2) }) +test_that("convert_output_type works (sample >> quantile, cdf, mean)", { + ex_bins <- seq(-2,2,1) + ex_quantiles <- c(0.25, 0.5, 0.75) + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "sample", + output_type_id = 1:1e5, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + value = rnorm(dplyr::n(), mean)) %>% + dplyr::select(-mean) + new_output_type = c("mean", "quantile", "cdf") + new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins) + expected_quantile <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = ex_quantiles, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + expected_cdf <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = ex_bins, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = pnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + expected_mean <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "mean", + output_type_id = NA, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + expected <- dplyr::bind_rows(expected_mean, expected_quantile, expected_cdf) + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) +}) + +#' ex_bins <- seq(-2,2,1) +#' ex_quantiles <- c(0.25, 0.5, 0.75) +#' model_out_tbl <- expand.grid( +#' group1 = c(1,2), +#' model_id = "A +#' output_type = "sample", +#' output_type_id = 1:100 +#' ) %>% +#' dplyr::mutate(value = rnorm(100, mean = group1)) +#' +#' convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), +#' new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) + test_that("convert_output_type fails correctly (quantile)",{ ex_ps <- seq(-2,10,length.out = 500)[2:499] model_out_tbl <- expand.grid( @@ -197,8 +263,7 @@ test_that("convert_output_type fails correctly: wrong starting output_type", { stringsAsFactors = FALSE ) new_output_type = "mean" - expect_error(convert_output_type( - model_out_tbl, new_output_type)) + expect_error(convert_output_type(model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (quantile >> pmf)", { @@ -210,8 +275,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type (quantile stringsAsFactors = FALSE ) new_output_type = "pmf" - expect_error(convert_output_type( - model_out_tbl, new_output_type)) + expect_error(convert_output_type(model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sample)", { @@ -223,8 +287,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sa stringsAsFactors = FALSE ) new_output_type = "sample" - expect_error(convert_output_type( - model_out_tbl, new_output_type)) + expect_error(convert_output_type(model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)", { @@ -237,9 +300,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)" ) new_output_type = "mean" new_output_type_id = c("A", "B") - expect_error(convert_output_type( - model_out_tbl, new_output_type, - new_output_type_id)) + expect_error(convert_output_type(model_out_tbl, new_output_type, new_output_type_id)) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (quantile)", { @@ -290,7 +351,7 @@ test_that("convert_from_sample works (return mean)", { hubUtils::as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test) + expect_equal(test, expected) }) test_that("convert_from_sample works (return median)", { @@ -310,7 +371,7 @@ test_that("convert_from_sample works (return median)", { hubUtils::as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test) + expect_equal(test, expected) }) test_that("convert_from_sample works (return quantile)", { @@ -330,7 +391,7 @@ test_that("convert_from_sample works (return quantile)", { hubUtils::as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test) + expect_equal(test, expected) }) test_that("convert_from_sample works (return cdf)", { @@ -350,7 +411,7 @@ test_that("convert_from_sample works (return cdf)", { hubUtils::as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) - expect_equal(expected, test) + expect_equal(test, expected) }) From bee1c61eccec860f19dbba52e2983c223330af36 Mon Sep 17 00:00:00 2001 From: Emily Howerton <46577370+eahowerton@users.noreply.github.com> Date: Fri, 7 Jun 2024 11:18:08 -0400 Subject: [PATCH 15/46] lint --- R/get_task_id_cols.R | 2 +- tests/testthat/test-convert_output_types.R | 641 ++++++++++----------- tests/testthat/test-get_task_id_cols.R | 2 +- 3 files changed, 315 insertions(+), 330 deletions(-) diff --git a/R/get_task_id_cols.R b/R/get_task_id_cols.R index 4cf4a00c..68e1a33b 100644 --- a/R/get_task_id_cols.R +++ b/R/get_task_id_cols.R @@ -1,5 +1,5 @@ #' @export -get_task_id_cols <- function(model_out_tbl){ +get_task_id_cols <- function(model_out_tbl) { model_out_cols <- colnames(model_out_tbl) task_id_cols <- model_out_cols[!model_out_cols %in% std_colnames] return(task_id_cols) diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 865596a4..3b5992d8 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -1,210 +1,210 @@ ### test convert_output_type() test_that("convert_output_type works (quantile >> mean)", { - ex_qs <- seq(0,1,length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_qs, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), - value = qnorm(ex_qs, mean)) %>% - dplyr::select(-mean) - new_output_type = "mean" - new_output_type_id = NA - expected <- tibble::tibble( - grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) - ) %>% - dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type = new_output_type, - output_type_id = new_output_type_id) %>% - hubUtils::as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) + ex_qs <- seq(0, 1, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "quantile", + output_type_id <- ex_qs, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + new_output_type <- "mean" + new_output_type_id <- NA + expected <- tibble::tibble( + grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) + ) %>% + dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type <- new_output_type, + output_type_id <- new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> median)", { - ex_qs <- seq(0,1,length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_qs, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), - value = qnorm(ex_qs, mean)) %>% - dplyr::select(-mean) - new_output_type = "median" - new_output_type_id = NA - expected <- tibble::tibble( - grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) - ) %>% - dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type = new_output_type, - output_type_id = new_output_type_id) %>% - hubUtils::as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) + ex_qs <- seq(0, 1, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "quantile", + output_type_id <- ex_qs, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + new_output_type <- "median" + new_output_type_id <- NA + expected <- tibble::tibble( + grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) + ) %>% + dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type <- new_output_type, + output_type_id <- new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> cdf)", { - ex_qs <- seq(0,1,length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_qs, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), - value = qnorm(ex_qs, mean)) %>% - dplyr::select(-mean) - new_output_type = "cdf" - new_output_type_id = seq(-2,2,0.5) - expected <- tibble::as_tibble(expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = new_output_type, - output_type_id = new_output_type_id, - KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE - )) %>% - dplyr:: mutate(value = pnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% - dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) + ex_qs <- seq(0, 1, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "quantile", + output_type_id <- ex_qs, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + new_output_type <- "cdf" + new_output_type_id <- seq(-2, 2, 0.5) + expected <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- new_output_type, + output_type_id <- new_output_type_id, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = pnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> mean)", { - ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) - new_output_type = "mean" - new_output_type_id = NA - expected <- tibble::tibble( - grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) - ) %>% - dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type = new_output_type, - output_type_id = new_output_type_id) %>% - hubUtils::as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) + ex_ps <- seq(-2, 10, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "cdf", + output_type_id <- ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type <- "mean" + new_output_type_id <- NA + expected <- tibble::tibble( + grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) + ) %>% + dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type <- new_output_type, + output_type_id <- new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> median)", { - ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) - new_output_type = "median" - new_output_type_id = NA - expected <- tibble::tibble( - grp1 = rep(1:2,2), model_id = sort(rep(LETTERS[1:2],2)) - ) %>% - dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type = new_output_type, - output_type_id = new_output_type_id) %>% - hubUtils::as_model_out_tbl() - test <- convert_output_type(model_out_tbl, - new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) + ex_ps <- seq(-2, 10, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "cdf", + output_type_id <- ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type <- "median" + new_output_type_id <- NA + expected <- tibble::tibble( + grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) + ) %>% + dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type <- new_output_type, + output_type_id <- new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_out_tbl, + new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> quantile)", { - ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) - new_output_type = "quantile" - new_output_type_id = c(0.25, 0.5, 0.75) - expected <- tibble::as_tibble(expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = new_output_type, - output_type_id = new_output_type_id, - KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE - )) %>% - dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% - dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() - test <- convert_output_type(model_out_tbl, - new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) + ex_ps <- seq(-2, 10, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "cdf", + output_type_id <- ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type <- "quantile" + new_output_type_id <- c(0.25, 0.5, 0.75) + expected <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- new_output_type, + output_type_id <- new_output_type_id, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_out_tbl, + new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (sample >> quantile, cdf, mean)", { - ex_bins <- seq(-2,2,1) + ex_bins <- seq(-2, 2, 1) ex_quantiles <- c(0.25, 0.5, 0.75) model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type = "sample", - output_type_id = 1:1e5, + output_type <- "sample", + output_type_id <- 1:1e5, stringsAsFactors = FALSE ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), value = rnorm(dplyr::n(), mean)) %>% dplyr::select(-mean) - new_output_type = c("mean", "quantile", "cdf") - new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins) + new_output_type <- c("mean", "quantile", "cdf") + new_output_type_id <- list("quantile" = ex_quantiles, "cdf" = ex_bins) expected_quantile <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_quantiles, + output_type <- "quantile", + output_type_id <- ex_quantiles, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% - dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% + dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() expected_cdf <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_bins, + output_type <- "cdf", + output_type_id <- ex_bins, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% - dplyr:: mutate(value = pnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% + dplyr:: mutate(value = pnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() expected_mean <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type = "mean", - output_type_id = NA, + output_type <- "mean", + output_type_id <- NA, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% - dplyr:: mutate(value = grp1*ifelse(model_id == "A", 1, 3)) %>% + dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% dplyr::arrange(model_id, grp1) %>% hubUtils::as_model_out_tbl() expected <- dplyr::bind_rows(expected_mean, expected_quantile, expected_cdf) @@ -212,206 +212,191 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { expect_equal(test, expected, tolerance = 1e-2) }) -#' ex_bins <- seq(-2,2,1) -#' ex_quantiles <- c(0.25, 0.5, 0.75) -#' model_out_tbl <- expand.grid( -#' group1 = c(1,2), -#' model_id = "A -#' output_type = "sample", -#' output_type_id = 1:100 -#' ) %>% -#' dplyr::mutate(value = rnorm(100, mean = group1)) -#' -#' convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), -#' new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) - -test_that("convert_output_type fails correctly (quantile)",{ - ex_ps <- seq(-2,10,length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1*ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) - new_output_type = "quantile" - new_output_type_id = c(0.25, 0.5, 0.75) - expected <- tibble::as_tibble(expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = new_output_type, - output_type_id = new_output_type_id, - KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE - )) %>% - dplyr:: mutate(value = qnorm(output_type_id, grp1*ifelse(model_id == "A", 1, 3))) %>% - dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) +test_that("convert_output_type fails correctly (quantile)", { + ex_ps <- seq(-2, 10, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "cdf", + output_type_id <- ex_ps, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + new_output_type <- "quantile" + new_output_type_id <- c(0.25, 0.5, 0.75) + expected <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- new_output_type, + output_type_id <- new_output_type_id, + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + hubUtils::as_model_out_tbl() + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type fails correctly: wrong starting output_type", { - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "pmf", - output_type_id = c("bin1", "bin2"), - stringsAsFactors = FALSE - ) - new_output_type = "mean" - expect_error(convert_output_type(model_out_tbl, new_output_type)) + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "pmf", + output_type_id <- c("bin1", "bin2"), + stringsAsFactors = FALSE + ) + new_output_type <- "mean" + expect_error(convert_output_type(model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (quantile >> pmf)", { - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = c(0.25, 0.5, 0.75), - stringsAsFactors = FALSE - ) - new_output_type = "pmf" - expect_error(convert_output_type(model_out_tbl, new_output_type)) + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "quantile", + output_type_id <- c(0.25, 0.5, 0.75), + stringsAsFactors = FALSE + ) + new_output_type <- "pmf" + expect_error(convert_output_type(model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sample)", { - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = -1:1, - stringsAsFactors = FALSE - ) - new_output_type = "sample" - expect_error(convert_output_type(model_out_tbl, new_output_type)) + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "cdf", + output_type_id <- -1:1, + stringsAsFactors = FALSE + ) + new_output_type <- "sample" + expect_error(convert_output_type(model_out_tbl, new_output_type)) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)", { - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = -1:1, - stringsAsFactors = FALSE - ) - new_output_type = "mean" - new_output_type_id = c("A", "B") - expect_error(convert_output_type(model_out_tbl, new_output_type, new_output_type_id)) + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "cdf", + output_type_id <- -1:1, + stringsAsFactors = FALSE + ) + new_output_type <- "mean" + new_output_type_id <- c("A", "B") + expect_error(convert_output_type(model_out_tbl, new_output_type, new_output_type_id)) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (quantile)", { - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = -1:1, - stringsAsFactors = FALSE - ) - new_output_type = "quantile" - new_output_type_id = c(-1,0,1) - expect_error( - convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - ) + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "cdf", + output_type_id <- -1:1, + stringsAsFactors = FALSE + ) + new_output_type <- "quantile" + new_output_type_id <- c(-1, 0, 1) + expect_error( + convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + ) }) test_that("convert_output_type fails correctly: wrong new_output_type_id (cdf)", { - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = seq(0,1,0.5), - stringsAsFactors = FALSE - ) - new_output_type = "cdf" - new_output_type_id = c("A", "B") - expect_error( - convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - ) + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type <- "quantile", + output_type_id <- seq(0, 1, 0.5), + stringsAsFactors = FALSE + ) + new_output_type <- "cdf" + new_output_type_id <- c("A", "B") + expect_error( + convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + ) }) ### test convert_from_sample() test_that("convert_from_sample works (return mean)", { - grouped_model_out_tbl = expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type_id = 1:5 - ) %>% - dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% - dplyr::group_by(grp1, model_id) - new_output_type = "mean" - new_output_type_id = NA - expected <- grouped_model_out_tbl %>% - dplyr::reframe(value = mean(value)) %>% - dplyr::mutate(output_type = new_output_type, - output_type_id = new_output_type_id) %>% - hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_out_tbl, new_output_type, - new_output_type_id) - expect_equal(test, expected) + grouped_model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id <- 1:5 + ) %>% + dplyr::mutate(value = grp1 * ifelse(model_id == "A", 1, 3) * output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type <- "mean" + new_output_type_id <- NA + expected <- grouped_model_out_tbl %>% + dplyr::reframe(value = mean(value)) %>% + dplyr::mutate(output_type <- new_output_type, + output_type_id <- new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, + new_output_type_id) + expect_equal(test, expected) }) test_that("convert_from_sample works (return median)", { - grouped_model_out_tbl = expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type_id = 1:5 - ) %>% - dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% - dplyr::group_by(grp1, model_id) - new_output_type = "median" - new_output_type_id = NA - expected <- grouped_model_out_tbl %>% - dplyr::reframe(value = median(value)) %>% - dplyr::mutate(output_type = new_output_type, - output_type_id = new_output_type_id) %>% - hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_out_tbl, new_output_type, - new_output_type_id) - expect_equal(test, expected) + grouped_model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id <- 1:5 + ) %>% + dplyr::mutate(value = grp1 * ifelse(model_id == "A", 1, 3) * output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type <- "median" + new_output_type_id <- NA + expected <- grouped_model_out_tbl %>% + dplyr::reframe(value = median(value)) %>% + dplyr::mutate(output_type <- new_output_type, + output_type_id <- new_output_type_id) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, + new_output_type_id) + expect_equal(test, expected) }) test_that("convert_from_sample works (return quantile)", { - grouped_model_out_tbl = expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type_id = 1:5 - ) %>% - dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% - dplyr::group_by(grp1, model_id) - new_output_type = "quantile" - new_output_type_id = c(0.25, 0.75) - expected <- grouped_model_out_tbl %>% - dplyr::reframe(value = quantile(value, new_output_type_id, names = FALSE), - output_type_id = new_output_type_id) %>% - dplyr::mutate(output_type = new_output_type) %>% - hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_out_tbl, new_output_type, - new_output_type_id) - expect_equal(test, expected) + grouped_model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id <- 1:5 + ) %>% + dplyr::mutate(value = grp1 * ifelse(model_id == "A", 1, 3) * output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type <- "quantile" + new_output_type_id <- c(0.25, 0.75) + expected <- grouped_model_out_tbl %>% + dplyr::reframe(value = quantile(value, new_output_type_id, names = FALSE), + output_type_id <- new_output_type_id) %>% + dplyr::mutate(output_type <- new_output_type) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, + new_output_type_id) + expect_equal(test, expected) }) test_that("convert_from_sample works (return cdf)", { - grouped_model_out_tbl = expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type_id = 1:5 - ) %>% - dplyr::mutate(value = grp1*ifelse(model_id == "A", 1, 3)*output_type_id) %>% - dplyr::group_by(grp1, model_id) - new_output_type = "cdf" - new_output_type_id = seq(0,30, 5) - expected <- grouped_model_out_tbl %>% - dplyr::reframe(value = ecdf(value)(new_output_type_id), - output_type_id = new_output_type_id) %>% - dplyr::mutate(output_type = new_output_type) %>% - hubUtils::as_model_out_tbl() - test <- convert_from_sample(grouped_model_out_tbl, new_output_type, - new_output_type_id) - expect_equal(test, expected) + grouped_model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type_id <- 1:5 + ) %>% + dplyr::mutate(value = grp1 * ifelse(model_id == "A", 1, 3) * output_type_id) %>% + dplyr::group_by(grp1, model_id) + new_output_type <- "cdf" + new_output_type_id <- seq(0, 30, 5) + expected <- grouped_model_out_tbl %>% + dplyr::reframe(value = ecdf(value)(new_output_type_id), + output_type_id <- new_output_type_id) %>% + dplyr::mutate(output_type <- new_output_type) %>% + hubUtils::as_model_out_tbl() + test <- convert_from_sample(grouped_model_out_tbl, new_output_type, + new_output_type_id) + expect_equal(test, expected) }) - - diff --git a/tests/testthat/test-get_task_id_cols.R b/tests/testthat/test-get_task_id_cols.R index 1a5dc74f..14a6280b 100644 --- a/tests/testthat/test-get_task_id_cols.R +++ b/tests/testthat/test-get_task_id_cols.R @@ -1,5 +1,5 @@ test_that("get_task_id_cols works", { - ex_qs <- seq(0,1,length.out = 5) + ex_qs <- seq(0, 1, length.out = 5) model_outputs <- expand.grid( grp1 = 1:2, grp2 = 1:3, From 28a6be6aa971761972663ae789f8a56ec4b068a1 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:06:10 -0400 Subject: [PATCH 16/46] Update DESCRIPTION --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index eb6f16a0..d45177b8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -47,6 +47,7 @@ Suggests: Additional_repositories: https://hubverse-org.r-universe.dev/ Remotes: hubverse-org/hubData + reichlab/distfromq Config/Needs/website: hubverse-org/hubStyle Config/testthat/edition: 3 Encoding: UTF-8 From cd82801ee936a8234dd8070978d0363b9b82fce2 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:09:27 -0400 Subject: [PATCH 17/46] Rename `convert_output_types()` title Co-authored-by: Anna Krystalli --- R/convert_output_types.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 16c839c4..f302dacd 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -1,3 +1,5 @@ +#' Transform between output types +#' #' Transform between output types, from one starting output_type into new #' output_types. See details for supported conversions. #' From 8bfee2099ef53035ddd907c177192faefbe7c276 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:12:03 -0400 Subject: [PATCH 18/46] Punctuation fixes Co-authored-by: Anna Krystalli --- R/convert_output_types.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index f302dacd..08d0c90e 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -5,14 +5,14 @@ #' #' @param model_out_tbl an object of class `model_out_tbl` with component model #' outputs (e.g., predictions). `model_out_tbl` should contain only one -#' unique value in the `output_type` column +#' unique value in the `output_type` column. #' @param new_output_type `string` indicating the desired output_type after #' transformation (`"mean"`, `"median"`, `"quantile"`, `"cdf"`); can also be a -#' vector if multiple new output_types are desired +#' vector if multiple new output_types are desired. #' @param new_output_type_id `vector` indicating desired output_type_ids for #' corresponding `new_output_type`; only needs to be specified if #' `new_output_type` includes `"quantile"` or `"cdf"` (see details for how to -#' specify when both `"quantile"` and `"cdf"` are desired) +#' specify when both `"quantile"` and `"cdf"` are desired.) #' @param n_samples `numeric` that specifies the number of samples to use when #' calculating output_types from an estimated quantile function. Defaults to `1e4`. #' @param ... parameters that are passed to `distfromq::make_q_fn`, specifying From 727d33d635fcd333fc94e61b6e8ea09859771c50 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:23:38 -0400 Subject: [PATCH 19/46] Remove within package `hubUtils` namespacing --- R/convert_output_types.R | 4 ++-- tests/testthat/test-convert_output_types.R | 28 +++++++++++----------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 08d0c90e..a498900f 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -104,7 +104,7 @@ convert_output_type <- function(model_out_tbl, new_output_type, output_type = new_output_type[i], output_type_id = NA ) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() } # otherwise calculate new values # first find new_output_type_id @@ -248,6 +248,6 @@ convert_from_sample <- function(grouped_model_out_tbl, new_output_type, # update output_type and output_type_id columns model_out_tbl_transform <- model_out_tbl_transform %>% dplyr::mutate(output_type = new_output_type) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() return(model_out_tbl_transform) } diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 3b5992d8..bda1646b 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -19,7 +19,7 @@ test_that("convert_output_type works (quantile >> mean)", { dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% dplyr::mutate(output_type <- new_output_type, output_type_id <- new_output_type_id) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) @@ -44,7 +44,7 @@ test_that("convert_output_type works (quantile >> median)", { dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% dplyr::mutate(output_type <- new_output_type, output_type_id <- new_output_type_id) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) @@ -73,7 +73,7 @@ test_that("convert_output_type works (quantile >> cdf)", { )) %>% dplyr:: mutate(value = pnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) @@ -98,7 +98,7 @@ test_that("convert_output_type works (cdf >> mean)", { dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% dplyr::mutate(output_type <- new_output_type, output_type_id <- new_output_type_id) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) @@ -123,7 +123,7 @@ test_that("convert_output_type works (cdf >> median)", { dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% dplyr::mutate(output_type <- new_output_type, output_type_id <- new_output_type_id) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) @@ -153,7 +153,7 @@ test_that("convert_output_type works (cdf >> quantile)", { )) %>% dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) @@ -184,7 +184,7 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { )) %>% dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() expected_cdf <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], @@ -195,7 +195,7 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { )) %>% dplyr:: mutate(value = pnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() expected_mean <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], @@ -206,7 +206,7 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { )) %>% dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() expected <- dplyr::bind_rows(expected_mean, expected_quantile, expected_cdf) test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) @@ -236,7 +236,7 @@ test_that("convert_output_type fails correctly (quantile)", { )) %>% dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) @@ -335,7 +335,7 @@ test_that("convert_from_sample works (return mean)", { dplyr::reframe(value = mean(value)) %>% dplyr::mutate(output_type <- new_output_type, output_type_id <- new_output_type_id) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected) @@ -355,7 +355,7 @@ test_that("convert_from_sample works (return median)", { dplyr::reframe(value = median(value)) %>% dplyr::mutate(output_type <- new_output_type, output_type_id <- new_output_type_id) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected) @@ -375,7 +375,7 @@ test_that("convert_from_sample works (return quantile)", { dplyr::reframe(value = quantile(value, new_output_type_id, names = FALSE), output_type_id <- new_output_type_id) %>% dplyr::mutate(output_type <- new_output_type) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected) @@ -395,7 +395,7 @@ test_that("convert_from_sample works (return cdf)", { dplyr::reframe(value = ecdf(value)(new_output_type_id), output_type_id <- new_output_type_id) %>% dplyr::mutate(output_type <- new_output_type) %>% - hubUtils::as_model_out_tbl() + as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected) From d07d9855f605c3a7d65535e5d9c55ca75d0aa677 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:34:28 -0400 Subject: [PATCH 20/46] Update DESCRIPTION --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d45177b8..ddfb2be1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,7 +36,6 @@ Imports: utils Suggests: covr, - curl, digest, distfromq, dplyr, @@ -46,7 +45,7 @@ Suggests: testthat (>= 3.2.0) Additional_repositories: https://hubverse-org.r-universe.dev/ Remotes: - hubverse-org/hubData + hubverse-org/hubData, reichlab/distfromq Config/Needs/website: hubverse-org/hubStyle Config/testthat/edition: 3 From 2d7d19c08f0f5759ea12bc5ad5f59ff694d3a66c Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 12:34:43 -0400 Subject: [PATCH 21/46] Update NAMESPACE --- NAMESPACE | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index e71fd271..0dd4d395 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,16 +5,21 @@ S3method(read_config,default) export("%>%") export(as_model_out_tbl) export(check_deprecated_schema) +export(convert_from_sample) +export(convert_output_type) export(extract_schema_version) export(get_config_tid) export(get_round_ids) export(get_round_idx) export(get_round_model_tasks) export(get_round_task_id_names) +export(get_samples_from_cdf) +export(get_samples_from_quantiles) export(get_schema) export(get_schema_url) export(get_schema_valid_versions) export(get_schema_version_latest) +export(get_task_id_cols) export(get_task_id_names) export(is_v3_config) export(is_v3_config_file) From 0f73f84f126ffe2939a6ed2facb3061aa65c44e3 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 14:59:24 -0400 Subject: [PATCH 22/46] Update convert_output_types.R --- R/convert_output_types.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index a498900f..6c1cd3a2 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -3,9 +3,8 @@ #' Transform between output types, from one starting output_type into new #' output_types. See details for supported conversions. #' -#' @param model_out_tbl an object of class `model_out_tbl` with component model -#' outputs (e.g., predictions). `model_out_tbl` should contain only one -#' unique value in the `output_type` column. +#' @param model_out_tbl an object of class `model_out_tbl` containing predictions +#' with only one unique value in the `output_type` column. #' @param new_output_type `string` indicating the desired output_type after #' transformation (`"mean"`, `"median"`, `"quantile"`, `"cdf"`); can also be a #' vector if multiple new output_types are desired. From 944404d6c2e42d5e687dfda52d9e841eb0b7ea42 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:21:44 -0400 Subject: [PATCH 23/46] Fix linting issues --- NAMESPACE | 1 + R/convert_output_types.R | 30 +++++++++++++++--------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0dd4d395..ffd7c4e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,3 +35,4 @@ importFrom(gh,gh) importFrom(jsonlite,fromJSON) importFrom(jsonlite,read_json) importFrom(magrittr,"%>%") +importFrom(rlang,.data) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 6c1cd3a2..808b35dd 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -1,5 +1,5 @@ #' Transform between output types -#' +#' #' Transform between output types, from one starting output_type into new #' output_types. See details for supported conversions. #' @@ -68,6 +68,7 @@ #' #' @return object of class `model_out_tbl` containing new output_type #' @export +#' @importFrom rlang .data convert_output_type <- function(model_out_tbl, new_output_type, new_output_type_id = NA, n_samples = 1e4, ...) { # validations @@ -92,13 +93,13 @@ convert_output_type <- function(model_out_tbl, new_output_type, } # transform based on new_output_type grouped_model_out_tbl <- model_out_tbl %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) + dplyr::group_by("model_id", dplyr::across(dplyr::all_of(task_id_cols))) model_out_tbl_transform <- vector("list", length = length(new_output_type)) - for (i in 1:length(new_output_type)) { + for (i in seq_along(new_output_type)) { # if median output desired, and Q50 provided return exact value if (new_output_type[i] == "median" && 0.5 %in% starting_output_type_ids) { model_out_tbl_transform[[i]] <- model_out_tbl %>% - dplyr::filter(output_type_id == 0.5) %>% + dplyr::filter(.data[["output_type_id"]] == 0.5) %>% dplyr::mutate( output_type = new_output_type[i], output_type_id = NA @@ -189,11 +190,11 @@ validate_new_output_type <- function(starting_output_type, new_output_type, get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_out_tbl %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) %>% + dplyr::group_by("model_id", dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( - ps = as.numeric(.data$output_type_id), - qs = .data$value, ... + ps = as.numeric(.data[["output_type_id"]]), + qs = .data[["value"]], ... )(runif(n_samples, 0, 1)) ) @@ -205,11 +206,11 @@ get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, . get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_out_tbl %>% - dplyr::group_by(model_id, dplyr::across(dplyr::all_of(task_id_cols))) %>% + dplyr::group_by("model_id", dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( - ps = .data$value, - qs = as.numeric(.data$output_type_id), ... + ps = .data[["value"]], + qs = as.numeric(.data[["output_type_id"]]), ... )(runif(n_samples, 0, 1)) ) return(samples) @@ -221,26 +222,25 @@ convert_from_sample <- function(grouped_model_out_tbl, new_output_type, if (new_output_type == "mean") { model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( - value = mean(value), + value = mean(.data[["value"]]), output_type_id = new_output_type_id ) } else if (new_output_type == "median") { model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( - value = median(value), + value = mean(.data[["value"]]), output_type_id = new_output_type_id ) } else if (new_output_type == "quantile") { model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( - value = quantile(value, as.numeric(new_output_type_id), names = FALSE - ), + value = quantile(.data[["value"]], as.numeric(new_output_type_id), names = FALSE), output_type_id = new_output_type_id ) } else if (new_output_type == "cdf") { model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( - value = ecdf(value)(as.numeric(new_output_type_id)), + value = ecdf(.data[["value"]])(as.numeric(new_output_type_id)), output_type_id = new_output_type_id ) } From 94fa10846dc1fbd8aec993a494ae89a71dbd28ef Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:22:06 -0400 Subject: [PATCH 24/46] Document `convert_output_type()` --- man/convert_output_type.Rd | 64 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 man/convert_output_type.Rd diff --git a/man/convert_output_type.Rd b/man/convert_output_type.Rd new file mode 100644 index 00000000..3025fc8e --- /dev/null +++ b/man/convert_output_type.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convert_output_types.R +\name{convert_output_type} +\alias{convert_output_type} +\title{Transform between output types} +\usage{ +convert_output_type( + model_out_tbl, + new_output_type, + new_output_type_id = NA, + n_samples = 10000, + ... +) +} +\arguments{ +\item{model_out_tbl}{an object of class \code{model_out_tbl} containing predictions +with only one unique value in the \code{output_type} column.} + +\item{new_output_type}{\code{string} indicating the desired output_type after +transformation (\code{"mean"}, \code{"median"}, \code{"quantile"}, \code{"cdf"}); can also be a +vector if multiple new output_types are desired.} + +\item{new_output_type_id}{\code{vector} indicating desired output_type_ids for +corresponding \code{new_output_type}; only needs to be specified if +\code{new_output_type} includes \code{"quantile"} or \code{"cdf"} (see details for how to +specify when both \code{"quantile"} and \code{"cdf"} are desired.)} + +\item{n_samples}{\code{numeric} that specifies the number of samples to use when +calculating output_types from an estimated quantile function. Defaults to \code{1e4}.} + +\item{...}{parameters that are passed to \code{distfromq::make_q_fn}, specifying +details of how to estimate a quantile function from provided quantile levels +and quantile values for \code{"quantile"} or \code{"cdf"} output types.} +} +\value{ +object of class \code{model_out_tbl} containing new output_type +} +\description{ +Transform between output types, from one starting output_type into new +output_types. See details for supported conversions. +} +\details{ +The following transformations are supported: (i) \code{"sample"} can be +transformed to \code{"mean"}, \code{"median"}, \code{"quantile"}, or \code{"cdf"}; (ii) +\code{"quantile"} can be transformed to \code{"mean"}, \code{"median"}, or \code{"cdf"}; and +(iii) \code{"cdf"} can be transformed to \code{"mean"}, \code{"median"}, or \code{"quantile"}. + +For \code{"quantile"} and \code{"cdf"} starting output types, we use the following approach: +\enumerate{ +\item Interpolate and extrapolate from the provided quantiles or probabilities +for each component model to obtain an estimate of the cdf of that distribution. +\item Draw samples from the distribution for each component model. To reduce +Monte Carlo variability, we use quasi-random samples corresponding to +quantiles of the estimated distribution. +\item Calculate the desired quantity (e.g., mean). +If the median quantile is provided in the \code{model_out_tbl} object (i.e., the +original output_type is \code{"median"} and 0.5 is contained in the original +output_type_id), the median value is extracted and returned directly. +} + +If both \code{"quantile"} and \code{"cdf"} output_types are desired, \code{new_output_type_id} +should be a named list, where each element specifies the corresponding +\code{new_output_type_id}. See examples for an illustration. +} From ac6333a72ca727fc22e027a537ad781d940854b0 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Wed, 11 Sep 2024 15:22:18 -0400 Subject: [PATCH 25/46] Update DESCRIPTION --- DESCRIPTION | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ddfb2be1..26adf9f0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,8 @@ Imports: checkmate, cli, curl, + distfromq, + dplyr, fs, gh, glue, @@ -35,18 +37,14 @@ Imports: tibble, utils Suggests: - covr, - digest, - distfromq, - dplyr, hubData, knitr, rmarkdown, testthat (>= 3.2.0) -Additional_repositories: https://hubverse-org.r-universe.dev/ Remotes: hubverse-org/hubData, reichlab/distfromq +Additional_repositories: https://hubverse-org.r-universe.dev/ Config/Needs/website: hubverse-org/hubStyle Config/testthat/edition: 3 Encoding: UTF-8 From e8595e10da3fd374d2245d449f9f292bd9758e8a Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:29:11 -0400 Subject: [PATCH 26/46] Fix failing tests --- R/convert_output_types.R | 6 +- tests/testthat/test-convert_output_types.R | 102 ++++++++++----------- 2 files changed, 54 insertions(+), 54 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 808b35dd..2bb3e82f 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -93,7 +93,7 @@ convert_output_type <- function(model_out_tbl, new_output_type, } # transform based on new_output_type grouped_model_out_tbl <- model_out_tbl %>% - dplyr::group_by("model_id", dplyr::across(dplyr::all_of(task_id_cols))) + dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) model_out_tbl_transform <- vector("list", length = length(new_output_type)) for (i in seq_along(new_output_type)) { # if median output desired, and Q50 provided return exact value @@ -190,7 +190,7 @@ validate_new_output_type <- function(starting_output_type, new_output_type, get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_out_tbl %>% - dplyr::group_by("model_id", dplyr::across(dplyr::all_of(task_id_cols))) %>% + dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( ps = as.numeric(.data[["output_type_id"]]), @@ -206,7 +206,7 @@ get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, . get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_out_tbl %>% - dplyr::group_by("model_id", dplyr::across(dplyr::all_of(task_id_cols))) %>% + dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( value = distfromq::make_q_fn( ps = .data[["value"]], diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index bda1646b..d9d15d9c 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -4,8 +4,8 @@ test_that("convert_output_type works (quantile >> mean)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "quantile", - output_type_id <- ex_qs, + output_type = "quantile", + output_type_id = ex_qs, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -16,9 +16,9 @@ test_that("convert_output_type works (quantile >> mean)", { expected <- tibble::tibble( grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) ) %>% - dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type <- new_output_type, - output_type_id <- new_output_type_id) %>% + dplyr::mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) @@ -29,8 +29,8 @@ test_that("convert_output_type works (quantile >> median)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "quantile", - output_type_id <- ex_qs, + output_type = "quantile", + output_type_id = ex_qs, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -42,8 +42,8 @@ test_that("convert_output_type works (quantile >> median)", { grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) ) %>% dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type <- new_output_type, - output_type_id <- new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) @@ -54,8 +54,8 @@ test_that("convert_output_type works (quantile >> cdf)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "quantile", - output_type_id <- ex_qs, + output_type = "quantile", + output_type_id = ex_qs, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -66,8 +66,8 @@ test_that("convert_output_type works (quantile >> cdf)", { expected <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- new_output_type, - output_type_id <- new_output_type_id, + output_type = new_output_type, + output_type_id = new_output_type_id, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% @@ -83,8 +83,8 @@ test_that("convert_output_type works (cdf >> mean)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", - output_type_id <- ex_ps, + output_type = "cdf", + output_type_id = ex_ps, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -96,8 +96,8 @@ test_that("convert_output_type works (cdf >> mean)", { grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) ) %>% dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type <- new_output_type, - output_type_id <- new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) @@ -108,8 +108,8 @@ test_that("convert_output_type works (cdf >> median)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", - output_type_id <- ex_ps, + output_type = "cdf", + output_type_id = ex_ps, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -121,8 +121,8 @@ test_that("convert_output_type works (cdf >> median)", { grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) ) %>% dplyr:: mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% - dplyr::mutate(output_type <- new_output_type, - output_type_id <- new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% as_model_out_tbl() test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) @@ -134,8 +134,8 @@ test_that("convert_output_type works (cdf >> quantile)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", - output_type_id <- ex_ps, + output_type = "cdf", + output_type_id = ex_ps, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -146,8 +146,8 @@ test_that("convert_output_type works (cdf >> quantile)", { expected <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- new_output_type, - output_type_id <- new_output_type_id, + output_type = new_output_type, + output_type_id = new_output_type_id, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% @@ -165,8 +165,8 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "sample", - output_type_id <- 1:1e5, + output_type = "sample", + output_type_id = 1:1e5, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -177,8 +177,8 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { expected_quantile <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "quantile", - output_type_id <- ex_quantiles, + output_type = "quantile", + output_type_id = ex_quantiles, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% @@ -188,8 +188,8 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { expected_cdf <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", - output_type_id <- ex_bins, + output_type = "cdf", + output_type_id = ex_bins, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% @@ -199,8 +199,8 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { expected_mean <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "mean", - output_type_id <- NA, + output_type = "mean", + output_type_id = NA, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% @@ -217,8 +217,8 @@ test_that("convert_output_type fails correctly (quantile)", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", - output_type_id <- ex_ps, + output_type = "cdf", + output_type_id = ex_ps, stringsAsFactors = FALSE ) %>% dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), @@ -229,8 +229,8 @@ test_that("convert_output_type fails correctly (quantile)", { expected <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- new_output_type, - output_type_id <- new_output_type_id, + output_type = new_output_type, + output_type_id = new_output_type_id, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE )) %>% @@ -245,7 +245,7 @@ test_that("convert_output_type fails correctly: wrong starting output_type", { model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "pmf", + output_type = "pmf", output_type_id <- c("bin1", "bin2"), stringsAsFactors = FALSE ) @@ -257,7 +257,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type (quantile model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "quantile", + output_type = "quantile", output_type_id <- c(0.25, 0.5, 0.75), stringsAsFactors = FALSE ) @@ -269,7 +269,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type (cdf >> sa model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", + output_type = "cdf", output_type_id <- -1:1, stringsAsFactors = FALSE ) @@ -281,7 +281,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (mean)" model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", + output_type = "cdf", output_type_id <- -1:1, stringsAsFactors = FALSE ) @@ -294,7 +294,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (quanti model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "cdf", + output_type = "cdf", output_type_id <- -1:1, stringsAsFactors = FALSE ) @@ -309,7 +309,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type_id (cdf)", model_out_tbl <- expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], - output_type <- "quantile", + output_type = "quantile", output_type_id <- seq(0, 1, 0.5), stringsAsFactors = FALSE ) @@ -333,8 +333,8 @@ test_that("convert_from_sample works (return mean)", { new_output_type_id <- NA expected <- grouped_model_out_tbl %>% dplyr::reframe(value = mean(value)) %>% - dplyr::mutate(output_type <- new_output_type, - output_type_id <- new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) @@ -353,8 +353,8 @@ test_that("convert_from_sample works (return median)", { new_output_type_id <- NA expected <- grouped_model_out_tbl %>% dplyr::reframe(value = median(value)) %>% - dplyr::mutate(output_type <- new_output_type, - output_type_id <- new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type, + output_type_id = new_output_type_id) %>% as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) @@ -373,8 +373,8 @@ test_that("convert_from_sample works (return quantile)", { new_output_type_id <- c(0.25, 0.75) expected <- grouped_model_out_tbl %>% dplyr::reframe(value = quantile(value, new_output_type_id, names = FALSE), - output_type_id <- new_output_type_id) %>% - dplyr::mutate(output_type <- new_output_type) %>% + output_type_id = new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type) %>% as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) @@ -393,8 +393,8 @@ test_that("convert_from_sample works (return cdf)", { new_output_type_id <- seq(0, 30, 5) expected <- grouped_model_out_tbl %>% dplyr::reframe(value = ecdf(value)(new_output_type_id), - output_type_id <- new_output_type_id) %>% - dplyr::mutate(output_type <- new_output_type) %>% + output_type_id = new_output_type_id) %>% + dplyr::mutate(output_type = new_output_type) %>% as_model_out_tbl() test <- convert_from_sample(grouped_model_out_tbl, new_output_type, new_output_type_id) From 781016da45ae67a84739efe2ceebc95e9db9966e Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:46:36 -0400 Subject: [PATCH 27/46] Improve output type conversion validation messages Co-authored-by: Anna Krystalli --- R/convert_output_types.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 2bb3e82f..87c8855c 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -133,17 +133,17 @@ validate_new_output_type <- function(starting_output_type, new_output_type, valid_starting_output_type <- starting_output_type %in% names(valid_conversions) if (!valid_starting_output_type) { cli::cli_abort(c( - "{.var output_type} provided cannot be transformed", - i = "must be of type {.var sample}, {.var quantile}, {.var cdf}." + "{.var output_type} {.val {starting_output_type}} provided cannot be transformed", + i = "must be of type {.val sample}, {.val quantile} or {.val cdf}." )) } # check new_output_type is supported invalid_new_output_type <- which(!(new_output_type %in% valid_conversions[[starting_output_type]])) if (length(invalid_new_output_type) > 0) { cli::cli_abort(c( - "{invalid_new_output_type} cannot be transformed to the specified - {.var new_output_type}", - i = "new_output_type must be {valid_conversions[[starting_output_type]]}" + "Output type {.val {starting_output_type}} cannot be converted to the specified + {.val {new_output_type[invalid_new_output_type]}}", + i = "{.var new_output_type} values must be one of {.val {valid_conversions[[starting_output_type]]}}" )) } # check new_output_type_id From 1a9789eaa5a92fa66de7a50af02666e9b88ce490 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 10:52:36 -0400 Subject: [PATCH 28/46] Define global functions --- NAMESPACE | 1 + R/convert_output_types.R | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index ffd7c4e5..0227945a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -36,3 +36,4 @@ importFrom(jsonlite,fromJSON) importFrom(jsonlite,read_json) importFrom(magrittr,"%>%") importFrom(rlang,.data) +importFrom(stats, "ecdf", "quantile", "runif") diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 87c8855c..9383d5db 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -195,7 +195,7 @@ get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, . value = distfromq::make_q_fn( ps = as.numeric(.data[["output_type_id"]]), qs = .data[["value"]], ... - )(runif(n_samples, 0, 1)) + )(stats::runif(n_samples, 0, 1)) ) @@ -211,7 +211,7 @@ get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { value = distfromq::make_q_fn( ps = .data[["value"]], qs = as.numeric(.data[["output_type_id"]]), ... - )(runif(n_samples, 0, 1)) + )(stats::runif(n_samples, 0, 1)) ) return(samples) } @@ -234,13 +234,13 @@ convert_from_sample <- function(grouped_model_out_tbl, new_output_type, } else if (new_output_type == "quantile") { model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( - value = quantile(.data[["value"]], as.numeric(new_output_type_id), names = FALSE), + value = stats::quantile(.data[["value"]], as.numeric(new_output_type_id), names = FALSE), output_type_id = new_output_type_id ) } else if (new_output_type == "cdf") { model_out_tbl_transform <- grouped_model_out_tbl %>% dplyr::reframe( - value = ecdf(.data[["value"]])(as.numeric(new_output_type_id)), + value = stats::ecdf(.data[["value"]])(as.numeric(new_output_type_id)), output_type_id = new_output_type_id ) } From 84ff4aabfc69a6a4c66a6659e43776e0f3822b9d Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:10:17 -0400 Subject: [PATCH 29/46] Clean up `convert_output_types` functions --- R/convert_output_types.R | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 9383d5db..4750c0bc 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -72,9 +72,8 @@ convert_output_type <- function(model_out_tbl, new_output_type, new_output_type_id = NA, n_samples = 1e4, ...) { # validations - task_id_cols <- get_task_id_cols(model_out_tbl) - starting_output_type <- model_out_tbl$output_type %>% unique() - starting_output_type_ids <- model_out_tbl$output_type_id %>% unique() + starting_output_type <- unique(model_out_tbl$output_type) + starting_output_type_ids <- unique(model_out_tbl$output_type_id) task_id_cols <- get_task_id_cols(model_out_tbl) validate_new_output_type( starting_output_type, new_output_type, @@ -111,10 +110,9 @@ convert_output_type <- function(model_out_tbl, new_output_type, new_output_type_id_tmp <- new_output_type_id if (new_output_type[i] %in% c("mean", "median")) { new_output_type_id_tmp <- NA - } else - if (is.list(new_output_type_id)) { - new_output_type_id_tmp <- new_output_type_id[[new_output_type[i]]] - } + } else if (is.list(new_output_type_id)) { + new_output_type_id_tmp <- new_output_type_id[[new_output_type[i]]] + } model_out_tbl_transform[[i]] <- convert_from_sample( grouped_model_out_tbl, new_output_type[i], new_output_type_id_tmp ) @@ -122,6 +120,7 @@ convert_output_type <- function(model_out_tbl, new_output_type, return(dplyr::bind_rows(model_out_tbl_transform)) } +#' @noRd validate_new_output_type <- function(starting_output_type, new_output_type, new_output_type_id) { valid_conversions <- list( @@ -197,8 +196,6 @@ get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, . qs = .data[["value"]], ... )(stats::runif(n_samples, 0, 1)) ) - - return(samples) } From 9fa2e35b83dd5483cb2a56f477910b4618b48083 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:31:17 -0400 Subject: [PATCH 30/46] Don't export smaller conversion functions --- NAMESPACE | 4 ---- R/convert_output_types.R | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0227945a..72e35475 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,7 +5,6 @@ S3method(read_config,default) export("%>%") export(as_model_out_tbl) export(check_deprecated_schema) -export(convert_from_sample) export(convert_output_type) export(extract_schema_version) export(get_config_tid) @@ -13,8 +12,6 @@ export(get_round_ids) export(get_round_idx) export(get_round_model_tasks) export(get_round_task_id_names) -export(get_samples_from_cdf) -export(get_samples_from_quantiles) export(get_schema) export(get_schema_url) export(get_schema_valid_versions) @@ -36,4 +33,3 @@ importFrom(jsonlite,fromJSON) importFrom(jsonlite,read_json) importFrom(magrittr,"%>%") importFrom(rlang,.data) -importFrom(stats, "ecdf", "quantile", "runif") diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 4750c0bc..15c08104 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -185,7 +185,7 @@ validate_new_output_type <- function(starting_output_type, new_output_type, } } -#' @export +#' @noRd get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_out_tbl %>% @@ -199,7 +199,7 @@ get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, . return(samples) } -#' @export +#' @noRd get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { set.seed(101) samples <- model_out_tbl %>% @@ -213,7 +213,7 @@ get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { return(samples) } -#' @export +#' @noRd convert_from_sample <- function(grouped_model_out_tbl, new_output_type, new_output_type_id) { if (new_output_type == "mean") { From db08c45eb4eac621de1bfe7da51572686eeef181 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:38:12 -0400 Subject: [PATCH 31/46] Document `get_task_id_cols()` --- R/get_task_id_cols.R | 5 +++++ man/get_task_id_cols.Rd | 17 +++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 man/get_task_id_cols.Rd diff --git a/R/get_task_id_cols.R b/R/get_task_id_cols.R index 68e1a33b..d5acd34b 100644 --- a/R/get_task_id_cols.R +++ b/R/get_task_id_cols.R @@ -1,3 +1,8 @@ +#' Get task ID column names from `model_out_tbl` object +#' +#' @param model_out_tbl an object of class `model_out_tbl` +#' +#' @return a character vector of task ID column names #' @export get_task_id_cols <- function(model_out_tbl) { model_out_cols <- colnames(model_out_tbl) diff --git a/man/get_task_id_cols.Rd b/man/get_task_id_cols.Rd new file mode 100644 index 00000000..7077e3e5 --- /dev/null +++ b/man/get_task_id_cols.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_task_id_cols.R +\name{get_task_id_cols} +\alias{get_task_id_cols} +\title{Get task ID column names from \code{model_out_tbl} object} +\usage{ +get_task_id_cols(model_out_tbl) +} +\arguments{ +\item{model_out_tbl}{an object of class \code{model_out_tbl}} +} +\value{ +a character vector of task ID column names +} +\description{ +Get task ID column names from \code{model_out_tbl} object +} From 75df987495ac9f347223a8a5a986fe5a5c6af5ad Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:04:02 -0400 Subject: [PATCH 32/46] Fix `convert_output_type()` example --- R/convert_output_types.R | 6 +++--- man/convert_output_type.Rd | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 15c08104..aae43460 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -50,18 +50,18 @@ #' ) %>% #' dplyr::mutate(value = qnorm(p = output_type_id, mean = group1)) #' -#' convert_output_type(model_out_tbl, c("group1"), new_output_type = "median") +#' convert_output_type(model_out_tbl, new_output_type = "median", new_output_type_id = NA) #' #' # Next, we illustrate conversion from samples to quantile and cdf #' ex_bins <- seq(-2,2,1) #' ex_quantiles <- c(0.25, 0.5, 0.75) #' model_out_tbl <- expand.grid( #' group1 = c(1,2), -#' model_id = "A +#' model_id = "A", #' output_type = "sample", #' output_type_id = 1:100 #' ) %>% -#' dplyr::mutate(value = rnorm(100, mean = group1)) +#' dplyr::mutate(value = rnorm(200, mean = group1)) #' #' convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), #' new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) diff --git a/man/convert_output_type.Rd b/man/convert_output_type.Rd index 3025fc8e..ced9c4ed 100644 --- a/man/convert_output_type.Rd +++ b/man/convert_output_type.Rd @@ -62,3 +62,31 @@ If both \code{"quantile"} and \code{"cdf"} output_types are desired, \code{new_o should be a named list, where each element specifies the corresponding \code{new_output_type_id}. See examples for an illustration. } +\examples{ +# We illustrate the conversion between output types using normal distributions, +ex_quantiles <- c(0.25, 0.5, 0.75) +model_out_tbl <- expand.grid( + group1 = c(1,2), + model_id = "A", + output_type = "quantile", + output_type_id = ex_quantiles +) \%>\% +dplyr::mutate(value = qnorm(p = output_type_id, mean = group1)) + +convert_output_type(model_out_tbl, new_output_type = "median", new_output_type_id = NA) + +# Next, we illustrate conversion from samples to quantile and cdf +ex_bins <- seq(-2,2,1) +ex_quantiles <- c(0.25, 0.5, 0.75) +model_out_tbl <- expand.grid( + group1 = c(1,2), + model_id = "A", + output_type = "sample", + output_type_id = 1:100 +) \%>\% +dplyr::mutate(value = rnorm(200, mean = group1)) + +convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), + new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) + +} From ee6b0982be37ec8bb5c3d302c8bab3a240ba2a1a Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:21:28 -0400 Subject: [PATCH 33/46] Clarify `convert_output_type()` description and return value --- R/convert_output_types.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index aae43460..9ee80bc5 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -1,10 +1,12 @@ #' Transform between output types #' -#' Transform between output types, from one starting output_type into new -#' output_types. See details for supported conversions. +#' Transform between output types for each unique combination of task IDs for +#' each model. Conversion must be from a single starting output type to one or more +#' new output types, and the resulting output will only contain the new output types. +#' See details for supported conversions. #' #' @param model_out_tbl an object of class `model_out_tbl` containing predictions -#' with only one unique value in the `output_type` column. +#' with a single, unique value in the `output_type` column. #' @param new_output_type `string` indicating the desired output_type after #' transformation (`"mean"`, `"median"`, `"quantile"`, `"cdf"`); can also be a #' vector if multiple new output_types are desired. @@ -66,7 +68,8 @@ #' convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), #' new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) #' -#' @return object of class `model_out_tbl` containing new output_type +#' @return object of class `model_out_tbl` containing (only) the new output_type(s) +#' for each unique combination of task IDs for each model #' @export #' @importFrom rlang .data convert_output_type <- function(model_out_tbl, new_output_type, From de84f22fa8b2e95f192c02eab12b0dd5cc785f41 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:26:33 -0400 Subject: [PATCH 34/46] Fix linting issues --- R/get_task_id_cols.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_task_id_cols.R b/R/get_task_id_cols.R index d5acd34b..782e4eb9 100644 --- a/R/get_task_id_cols.R +++ b/R/get_task_id_cols.R @@ -1,6 +1,6 @@ #' Get task ID column names from `model_out_tbl` object #' -#' @param model_out_tbl an object of class `model_out_tbl` +#' @param model_out_tbl an object of class `model_out_tbl` #' #' @return a character vector of task ID column names #' @export From d4119e24f6fb6fddc0e5a0b915871f0b0161acd6 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:46:56 -0400 Subject: [PATCH 35/46] Add single starting output type validation and test --- R/convert_output_types.R | 4 ++++ tests/testthat/test-convert_output_types.R | 13 +++++++++++++ 2 files changed, 17 insertions(+) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 9ee80bc5..7dd7915d 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -126,6 +126,10 @@ convert_output_type <- function(model_out_tbl, new_output_type, #' @noRd validate_new_output_type <- function(starting_output_type, new_output_type, new_output_type_id) { + # check only one starting_output_type is provided + if (length(starting_output_type) != 1) { + cli::cli_abort(c("Only one {.var starting_output_type} may be provided")) + } valid_conversions <- list( "sample" = c("mean", "median", "quantile", "cdf"), "quantile" = c("mean", "median", "cdf"), diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index d9d15d9c..5008bd5d 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -241,6 +241,19 @@ test_that("convert_output_type fails correctly (quantile)", { expect_equal(test, expected, tolerance = 1e-2) }) +test_that("convert_output_type fails correctly: multiple starting output types provided", { + model_out_tbl <- expand.grid( + grp1 = 1, + model_id = "A", + output_type = c("cdf", "quantile"), + output_type_id = 1:2, + stringsAsFactors = FALSE + ) + model_out_tbl$output_type_id <- c(5, 0.25, 10, 0.75) + new_output_type <- "mean" + expect_error(convert_output_type(model_out_tbl, new_output_type)) +}) + test_that("convert_output_type fails correctly: wrong starting output_type", { model_out_tbl <- expand.grid( grp1 = 1:2, From 1e485842b57e590b0a9eeb7db7bcdbc9695b0745 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 15:22:17 -0400 Subject: [PATCH 36/46] Remove duplicate test --- tests/testthat/test-convert_output_types.R | 33 ++-------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 5008bd5d..5adf8a2f 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -212,35 +212,6 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { expect_equal(test, expected, tolerance = 1e-2) }) -test_that("convert_output_type fails correctly (quantile)", { - ex_ps <- seq(-2, 10, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) - new_output_type <- "quantile" - new_output_type_id <- c(0.25, 0.5, 0.75) - expected <- tibble::as_tibble(expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = new_output_type, - output_type_id = new_output_type_id, - KEEP.OUT.ATTRS = FALSE, - stringsAsFactors = FALSE - )) %>% - dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% - dplyr::arrange(model_id, grp1) %>% - as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) - expect_equal(test, expected, tolerance = 1e-2) -}) - test_that("convert_output_type fails correctly: multiple starting output types provided", { model_out_tbl <- expand.grid( grp1 = 1, @@ -259,7 +230,7 @@ test_that("convert_output_type fails correctly: wrong starting output_type", { grp1 = 1:2, model_id = LETTERS[1:2], output_type = "pmf", - output_type_id <- c("bin1", "bin2"), + output_type_id = c("bin1", "bin2"), stringsAsFactors = FALSE ) new_output_type <- "mean" @@ -271,7 +242,7 @@ test_that("convert_output_type fails correctly: wrong new_output_type (quantile grp1 = 1:2, model_id = LETTERS[1:2], output_type = "quantile", - output_type_id <- c(0.25, 0.5, 0.75), + output_type_id = c(0.25, 0.5, 0.75), stringsAsFactors = FALSE ) new_output_type <- "pmf" From 66ad6909c5e27987c40113701e086a244d3fb4ce Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 15:25:34 -0400 Subject: [PATCH 37/46] Remove seed and check for `distfromq` Co-authored-by: Anna Krystalli --- R/convert_output_types.R | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 7dd7915d..b09724f9 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -194,7 +194,14 @@ validate_new_output_type <- function(starting_output_type, new_output_type, #' @noRd get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, ...) { - set.seed(101) + if (!requireNamespace("distfromq")) { + cli::cli_abort( + c("x" = "{.pkg distfromq} must be installed to convert {.val cdf} + or {.val quantile} output types.", + "i" = "Use {.code remotes::install_github('reichlab/distfromq')} to install." + ) + ) + } samples <- model_out_tbl %>% dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( @@ -208,7 +215,14 @@ get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, . #' @noRd get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { - set.seed(101) + if (!requireNamespace("distfromq")) { + cli::cli_abort( + c("x" = "{.pkg distfromq} must be installed to convert {.val cdf} + or {.val quantile} output types.", + "i" = "Use {.code remotes::install_github('reichlab/distfromq')} to install." + ) + ) + } samples <- model_out_tbl %>% dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) %>% dplyr::reframe( From 4b8ed54226af863c4491256a41c624e04403520b Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 12 Sep 2024 16:00:35 -0400 Subject: [PATCH 38/46] Fix failing test --- tests/testthat/test-convert_output_types.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 5adf8a2f..36dc9bf4 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -74,6 +74,7 @@ test_that("convert_output_type works (quantile >> cdf)", { dplyr:: mutate(value = pnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% as_model_out_tbl() + set.seed(101) test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) From e23e0478b8fcb1862b33beaf534799fa90d4183c Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:45:25 -0400 Subject: [PATCH 39/46] Refactor new `validate_output_type_id()` function prints more informative validation messages --- R/convert_output_types.R | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index b09724f9..83b50cba 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -153,24 +153,35 @@ validate_new_output_type <- function(starting_output_type, new_output_type, )) } # check new_output_type_id - if (all(new_output_type %in% c("mean", "median")) && !all(is.na(new_output_type_id))) { + if (length(new_output_type) == 1) { + validate_new_output_type_id(new_output_type, new_output_type_id) + } else if (length(new_output_type > 1)) { + purrr::imap(.x = new_output_type, + ~ validate_new_output_type_id(new_output_type = .x, + new_output_type_id = new_output_type_id[[.y]])) + } +} + +#' @noRd +validate_new_output_type_id <- function(new_output_type, new_output_type_id) { + if (new_output_type %in% c("mean", "median") && !all(is.na(new_output_type_id))) { cli::cli_abort(c( "{.var new_output_type_id} is incompatible with {.var new_output_type}", i = "{.var new_output_type_id} should be {.var NA}" )) - } else if ("quantile" %in% new_output_type) { - new_output_type_id_tmp <- new_output_type_id + } else if (new_output_type == "quantile") { + new_output_type_id_quantile <- new_output_type_id if (is.list(new_output_type_id)) { - new_output_type_id_tmp <- new_output_type_id[["quantile"]] + new_output_type_id_quantile <- new_output_type_id[["quantile"]] } - if (!is.numeric(new_output_type_id_tmp)) { + if (!is.numeric(new_output_type_id_quantile)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be numeric", i = "elements of {.var new_output_type_id} represent quantiles of the predictive distribution" )) } - if (any(new_output_type_id_tmp < 0) || any(new_output_type_id_tmp > 1)) { + if (any(new_output_type_id_quantile < 0) || any(new_output_type_id_quantile > 1)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be between 0 and 1", i = "elements of {.var new_output_type_id} represent quantiles @@ -178,11 +189,11 @@ validate_new_output_type <- function(starting_output_type, new_output_type, )) } } else if (new_output_type == "cdf") { - new_output_type_id_tmp <- new_output_type_id + new_output_type_id_cdf <- new_output_type_id if (is.list(new_output_type_id)) { - new_output_type_id_tmp <- new_output_type_id[["cdf"]] + new_output_type_id_cdf <- new_output_type_id[["cdf"]] } - if (!is.numeric(new_output_type_id_tmp)) { + if (!is.numeric(new_output_type_id_cdf)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be numeric", i = "elements of {.var new_output_type_id} represent possible From 6dd2926526ab2e652144b086656bcdda68099a52 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:45:44 -0400 Subject: [PATCH 40/46] Update documentation --- man/convert_output_type.Rd | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/man/convert_output_type.Rd b/man/convert_output_type.Rd index ced9c4ed..5ab50c0c 100644 --- a/man/convert_output_type.Rd +++ b/man/convert_output_type.Rd @@ -14,7 +14,7 @@ convert_output_type( } \arguments{ \item{model_out_tbl}{an object of class \code{model_out_tbl} containing predictions -with only one unique value in the \code{output_type} column.} +with a single, unique value in the \code{output_type} column.} \item{new_output_type}{\code{string} indicating the desired output_type after transformation (\code{"mean"}, \code{"median"}, \code{"quantile"}, \code{"cdf"}); can also be a @@ -33,11 +33,14 @@ details of how to estimate a quantile function from provided quantile levels and quantile values for \code{"quantile"} or \code{"cdf"} output types.} } \value{ -object of class \code{model_out_tbl} containing new output_type +object of class \code{model_out_tbl} containing (only) the new output_type(s) +for each unique combination of task IDs for each model } \description{ -Transform between output types, from one starting output_type into new -output_types. See details for supported conversions. +Transform between output types for each unique combination of task IDs for +each model. Conversion must be from a single starting output type to one or more +new output types, and the resulting output will only contain the new output types. +See details for supported conversions. } \details{ The following transformations are supported: (i) \code{"sample"} can be From 95d6f28eeca4a0fd9474822d0c3f22b22d8ce455 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:46:03 -0400 Subject: [PATCH 41/46] Testing fixes --- tests/testthat/test-convert_output_types.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 36dc9bf4..73eca109 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -174,7 +174,7 @@ test_that("convert_output_type works (sample >> quantile, cdf, mean)", { value = rnorm(dplyr::n(), mean)) %>% dplyr::select(-mean) new_output_type <- c("mean", "quantile", "cdf") - new_output_type_id <- list("quantile" = ex_quantiles, "cdf" = ex_bins) + new_output_type_id <- list("mean" = NA, "quantile" = ex_quantiles, "cdf" = ex_bins) expected_quantile <- tibble::as_tibble(expand.grid( grp1 = 1:2, model_id = LETTERS[1:2], From 522e88f77c2020ec32f4c999a17bd57045fe87dd Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 3 Oct 2024 14:50:00 -0400 Subject: [PATCH 42/46] model_out_tbl output type col is character in `convert_output_type()` example --- R/convert_output_types.R | 2 ++ man/convert_output_type.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 83b50cba..c7a23a2b 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -45,6 +45,7 @@ #' # We illustrate the conversion between output types using normal distributions, #' ex_quantiles <- c(0.25, 0.5, 0.75) #' model_out_tbl <- expand.grid( +#' stringsAsFactors = FALSE, #' group1 = c(1,2), #' model_id = "A", #' output_type = "quantile", @@ -58,6 +59,7 @@ #' ex_bins <- seq(-2,2,1) #' ex_quantiles <- c(0.25, 0.5, 0.75) #' model_out_tbl <- expand.grid( +#' stringsAsFactors = FALSE, #' group1 = c(1,2), #' model_id = "A", #' output_type = "sample", diff --git a/man/convert_output_type.Rd b/man/convert_output_type.Rd index 5ab50c0c..9b779e02 100644 --- a/man/convert_output_type.Rd +++ b/man/convert_output_type.Rd @@ -69,6 +69,7 @@ should be a named list, where each element specifies the corresponding # We illustrate the conversion between output types using normal distributions, ex_quantiles <- c(0.25, 0.5, 0.75) model_out_tbl <- expand.grid( + stringsAsFactors = FALSE, group1 = c(1,2), model_id = "A", output_type = "quantile", @@ -82,6 +83,7 @@ convert_output_type(model_out_tbl, new_output_type = "median", new_output_type_i ex_bins <- seq(-2,2,1) ex_quantiles <- c(0.25, 0.5, 0.75) model_out_tbl <- expand.grid( + stringsAsFactors = FALSE, group1 = c(1,2), model_id = "A", output_type = "sample", From 054bffb68396d1a2bb1d10f1a296e6b53baf323b Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Thu, 3 Oct 2024 16:08:26 -0400 Subject: [PATCH 43/46] Make `new_output_type_id` arg usually a named list; simplify its validations --- R/convert_output_types.R | 29 ++++++++++++----------------- man/convert_output_type.Rd | 17 +++++++++-------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index c7a23a2b..fe4f1440 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -7,13 +7,14 @@ #' #' @param model_out_tbl an object of class `model_out_tbl` containing predictions #' with a single, unique value in the `output_type` column. -#' @param new_output_type `string` indicating the desired output_type after -#' transformation (`"mean"`, `"median"`, `"quantile"`, `"cdf"`); can also be a -#' vector if multiple new output_types are desired. -#' @param new_output_type_id `vector` indicating desired output_type_ids for -#' corresponding `new_output_type`; only needs to be specified if -#' `new_output_type` includes `"quantile"` or `"cdf"` (see details for how to -#' specify when both `"quantile"` and `"cdf"` are desired.) +#' @param new_output_type character vector of the desired output type(s) after +#' transformation. May contain any of the following output types: +#' `"mean"`, `"median"`, `"quantile"`, `"cdf"`. +#' @param new_output_type_id A named list indicating the desired output type IDs +#' for each new output type, in which each element is a vector of output type IDs. +#' If only one new output type is requested, then it may be a single numeric vector +#' (for `"quantile"` or `"cdf"`) or not required (`"mean"` or `"median"`). See +#' the examples for an illustration of both cases. #' @param n_samples `numeric` that specifies the number of samples to use when #' calculating output_types from an estimated quantile function. Defaults to `1e4`. #' @param ... parameters that are passed to `distfromq::make_q_fn`, specifying @@ -42,7 +43,7 @@ #' `new_output_type_id`. See examples for an illustration. #' #' @examples -#' # We illustrate the conversion between output types using normal distributions, +#' # We illustrate the conversion between output types using normal distributions #' ex_quantiles <- c(0.25, 0.5, 0.75) #' model_out_tbl <- expand.grid( #' stringsAsFactors = FALSE, @@ -158,9 +159,9 @@ validate_new_output_type <- function(starting_output_type, new_output_type, if (length(new_output_type) == 1) { validate_new_output_type_id(new_output_type, new_output_type_id) } else if (length(new_output_type > 1)) { - purrr::imap(.x = new_output_type, - ~ validate_new_output_type_id(new_output_type = .x, - new_output_type_id = new_output_type_id[[.y]])) + purrr::map(.x = new_output_type, + ~ validate_new_output_type_id(new_output_type = .x, + new_output_type_id = new_output_type_id[[.x]])) } } @@ -173,9 +174,6 @@ validate_new_output_type_id <- function(new_output_type, new_output_type_id) { )) } else if (new_output_type == "quantile") { new_output_type_id_quantile <- new_output_type_id - if (is.list(new_output_type_id)) { - new_output_type_id_quantile <- new_output_type_id[["quantile"]] - } if (!is.numeric(new_output_type_id_quantile)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be numeric", @@ -192,9 +190,6 @@ validate_new_output_type_id <- function(new_output_type, new_output_type_id) { } } else if (new_output_type == "cdf") { new_output_type_id_cdf <- new_output_type_id - if (is.list(new_output_type_id)) { - new_output_type_id_cdf <- new_output_type_id[["cdf"]] - } if (!is.numeric(new_output_type_id_cdf)) { cli::cli_abort(c( "elements of {.var new_output_type_id} should be numeric", diff --git a/man/convert_output_type.Rd b/man/convert_output_type.Rd index 9b779e02..be64374a 100644 --- a/man/convert_output_type.Rd +++ b/man/convert_output_type.Rd @@ -16,14 +16,15 @@ convert_output_type( \item{model_out_tbl}{an object of class \code{model_out_tbl} containing predictions with a single, unique value in the \code{output_type} column.} -\item{new_output_type}{\code{string} indicating the desired output_type after -transformation (\code{"mean"}, \code{"median"}, \code{"quantile"}, \code{"cdf"}); can also be a -vector if multiple new output_types are desired.} +\item{new_output_type}{character vector of the desired output type(s) after +transformation. May contain any of the following output types: +\code{"mean"}, \code{"median"}, \code{"quantile"}, \code{"cdf"}.} -\item{new_output_type_id}{\code{vector} indicating desired output_type_ids for -corresponding \code{new_output_type}; only needs to be specified if -\code{new_output_type} includes \code{"quantile"} or \code{"cdf"} (see details for how to -specify when both \code{"quantile"} and \code{"cdf"} are desired.)} +\item{new_output_type_id}{A named list indicating the desired output type IDs +for each new output type, in which each element is a vector of output type IDs. +If only one new output type is requested, then it may be a single numeric vector +(for \code{"quantile"} or \code{"cdf"}) or not required (\code{"mean"} or \code{"median"}). See +the examples for an illustration of both cases.} \item{n_samples}{\code{numeric} that specifies the number of samples to use when calculating output_types from an estimated quantile function. Defaults to \code{1e4}.} @@ -66,7 +67,7 @@ should be a named list, where each element specifies the corresponding \code{new_output_type_id}. See examples for an illustration. } \examples{ -# We illustrate the conversion between output types using normal distributions, +# We illustrate the conversion between output types using normal distributions ex_quantiles <- c(0.25, 0.5, 0.75) model_out_tbl <- expand.grid( stringsAsFactors = FALSE, From cbe2acfd45a7b647f3097b9e0e66286f83d9fc88 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 4 Oct 2024 12:07:24 -0400 Subject: [PATCH 44/46] Fix quantile -> median, mean/cdf transformation (use intermediary samples) --- R/convert_output_types.R | 74 +++++++++++++++------- tests/testthat/test-convert_output_types.R | 38 +++++++++++ 2 files changed, 90 insertions(+), 22 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index fe4f1440..7f1b93f4 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -92,36 +92,42 @@ convert_output_type <- function(model_out_tbl, new_output_type, } else if (starting_output_type == "quantile") { # if median output desired, and Q50 provided return exact value, otherwise # estimate from samples - if (!("median" %in% new_output_type && 0.5 %in% starting_output_type_ids)) { - model_out_tbl <- get_samples_from_quantiles(model_out_tbl, task_id_cols, n_samples) + if (any(new_output_type != "median") || !(0.5 %in% starting_output_type_ids)) { + model_out_tbl <- model_out_tbl %>% + get_samples_from_quantiles(task_id_cols, n_samples) %>% + rbind(model_out_tbl) } } # transform based on new_output_type - grouped_model_out_tbl <- model_out_tbl %>% - dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) model_out_tbl_transform <- vector("list", length = length(new_output_type)) for (i in seq_along(new_output_type)) { + # first find new_output_type_id + new_output_type_id_tmp <- new_output_type_id + if (new_output_type[i] %in% c("mean", "median")) { + new_output_type_id_tmp <- NA + } else if (is.list(new_output_type_id)) { + new_output_type_id_tmp <- new_output_type_id[[new_output_type[i]]] + } # if median output desired, and Q50 provided return exact value if (new_output_type[i] == "median" && 0.5 %in% starting_output_type_ids) { model_out_tbl_transform[[i]] <- model_out_tbl %>% - dplyr::filter(.data[["output_type_id"]] == 0.5) %>% + dplyr::filter( + .data[["output_type"]] != "sample", + .data[["output_type_id"]] == 0.5 + ) %>% dplyr::mutate( output_type = new_output_type[i], output_type_id = NA ) %>% as_model_out_tbl() + } else { # otherwise calculate new values + grouped_model_out_tbl <- model_out_tbl %>% + dplyr::filter(.data[["output_type"]] == "sample") %>% + dplyr::group_by(dplyr::across(dplyr::all_of(c("model_id", task_id_cols)))) + model_out_tbl_transform[[i]] <- grouped_model_out_tbl %>% + convert_from_sample(new_output_type[i], new_output_type_id_tmp) %>% + dplyr::ungroup() } - # otherwise calculate new values - # first find new_output_type_id - new_output_type_id_tmp <- new_output_type_id - if (new_output_type[i] %in% c("mean", "median")) { - new_output_type_id_tmp <- NA - } else if (is.list(new_output_type_id)) { - new_output_type_id_tmp <- new_output_type_id[[new_output_type[i]]] - } - model_out_tbl_transform[[i]] <- convert_from_sample( - grouped_model_out_tbl, new_output_type[i], new_output_type_id_tmp - ) } return(dplyr::bind_rows(model_out_tbl_transform)) } @@ -210,15 +216,27 @@ get_samples_from_quantiles <- function(model_out_tbl, task_id_cols, n_samples, . ) ) } + samples <- model_out_tbl %>% - dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(c("model_id", task_id_cols)))) %>% dplyr::reframe( value = distfromq::make_q_fn( ps = as.numeric(.data[["output_type_id"]]), qs = .data[["value"]], ... )(stats::runif(n_samples, 0, 1)) - ) - return(samples) + ) %>% + dplyr::ungroup() + split_samples <- split(samples, f = samples[[task_id_cols]]) + formatted_samples <- split_samples %>% + purrr::map(.f = function(split_samples) { + dplyr::mutate(split_samples, + output_type = "sample", + output_type_id = as.numeric(dplyr::row_number()), + .before = "value") + }) %>% + purrr::list_rbind() %>% + as_model_out_tbl() + return(formatted_samples) } #' @noRd @@ -231,15 +249,27 @@ get_samples_from_cdf <- function(model_out_tbl, task_id_cols, n_samples, ...) { ) ) } + samples <- model_out_tbl %>% - dplyr::group_by(.data[["model_id"]], dplyr::across(dplyr::all_of(task_id_cols))) %>% + dplyr::group_by(dplyr::across(dplyr::all_of(c("model_id", task_id_cols)))) %>% dplyr::reframe( value = distfromq::make_q_fn( ps = .data[["value"]], qs = as.numeric(.data[["output_type_id"]]), ... )(stats::runif(n_samples, 0, 1)) - ) - return(samples) + ) %>% + dplyr::ungroup() + split_samples <- split(samples, f = samples[[task_id_cols]]) + formatted_samples <- split_samples %>% + purrr::map(.f = function(split_samples) { + dplyr::mutate(split_samples, + output_type = "sample", + output_type_id = as.numeric(dplyr::row_number()), + .before = "value") + }) %>% + purrr::list_rbind() %>% + as_model_out_tbl() + return(formatted_samples) } #' @noRd diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 73eca109..3d9f2642 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -79,6 +79,44 @@ test_that("convert_output_type works (quantile >> cdf)", { expect_equal(test, expected, tolerance = 1e-2) }) +test_that("convert_output_type works (quantile >> cdf, median)", { + ex_qs <- seq(0, 1, length.out = 500)[2:499] + model_out_tbl <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = ex_qs, + stringsAsFactors = FALSE + ) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + new_output_type <- c("cdf", "median") + new_output_type_id <- list(cdf = seq(-2, 2, 0.5), median = NA) + expected_median <- tibble::tibble( + grp1 = rep(1:2, 2), model_id = sort(rep(LETTERS[1:2], 2)) + ) %>% + dplyr::mutate(value = grp1 * ifelse(model_id == "A", 1, 3)) %>% + dplyr::mutate(output_type = new_output_type[2], + output_type_id = new_output_type_id[[2]]) %>% + as_model_out_tbl() + expected_cdf <- tibble::as_tibble(expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = new_output_type[1], + output_type_id = new_output_type_id[[1]], + KEEP.OUT.ATTRS = FALSE, + stringsAsFactors = FALSE + )) %>% + dplyr::mutate(value = pnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% + dplyr::arrange(model_id, grp1) %>% + as_model_out_tbl() + expected <- rbind(expected_cdf, expected_median) + set.seed(101) + test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + expect_equal(test, expected, tolerance = 1e-2) +}) + test_that("convert_output_type works (cdf >> mean)", { ex_ps <- seq(-2, 10, length.out = 500)[2:499] model_out_tbl <- expand.grid( From 8d2e95b94a25c961f62ceb9a8cb4c6e31b0d5bca Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 4 Oct 2024 16:26:48 -0400 Subject: [PATCH 45/46] Cleanup tests --- tests/testthat/test-convert_output_types.R | 118 ++++++--------------- 1 file changed, 32 insertions(+), 86 deletions(-) diff --git a/tests/testthat/test-convert_output_types.R b/tests/testthat/test-convert_output_types.R index 3d9f2642..5cfa565c 100644 --- a/tests/testthat/test-convert_output_types.R +++ b/tests/testthat/test-convert_output_types.R @@ -1,16 +1,30 @@ +# set up toy data for tests +ex_qs <- seq(0, 1, length.out = 500)[2:499] +quantile_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "quantile", + output_type_id = ex_qs, + stringsAsFactors = FALSE +) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = qnorm(ex_qs, mean)) %>% + dplyr::select(-mean) + +ex_ps <- seq(-2, 10, length.out = 500)[2:499] +cdf_outputs <- expand.grid( + grp1 = 1:2, + model_id = LETTERS[1:2], + output_type = "cdf", + output_type_id = ex_ps, + stringsAsFactors = FALSE +) %>% + dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), + value = pnorm(output_type_id, mean)) %>% + dplyr::select(-mean) + ### test convert_output_type() test_that("convert_output_type works (quantile >> mean)", { - ex_qs <- seq(0, 1, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_qs, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = qnorm(ex_qs, mean)) %>% - dplyr::select(-mean) new_output_type <- "mean" new_output_type_id <- NA expected <- tibble::tibble( @@ -20,22 +34,11 @@ test_that("convert_output_type works (quantile >> mean)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + test <- convert_output_type(quantile_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> median)", { - ex_qs <- seq(0, 1, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_qs, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = qnorm(ex_qs, mean)) %>% - dplyr::select(-mean) new_output_type <- "median" new_output_type_id <- NA expected <- tibble::tibble( @@ -45,22 +48,11 @@ test_that("convert_output_type works (quantile >> median)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + test <- convert_output_type(quantile_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> cdf)", { - ex_qs <- seq(0, 1, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_qs, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = qnorm(ex_qs, mean)) %>% - dplyr::select(-mean) new_output_type <- "cdf" new_output_type_id <- seq(-2, 2, 0.5) expected <- tibble::as_tibble(expand.grid( @@ -75,22 +67,11 @@ test_that("convert_output_type works (quantile >> cdf)", { dplyr::arrange(model_id, grp1) %>% as_model_out_tbl() set.seed(101) - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + test <- convert_output_type(quantile_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (quantile >> cdf, median)", { - ex_qs <- seq(0, 1, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "quantile", - output_type_id = ex_qs, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = qnorm(ex_qs, mean)) %>% - dplyr::select(-mean) new_output_type <- c("cdf", "median") new_output_type_id <- list(cdf = seq(-2, 2, 0.5), median = NA) expected_median <- tibble::tibble( @@ -113,22 +94,11 @@ test_that("convert_output_type works (quantile >> cdf, median)", { as_model_out_tbl() expected <- rbind(expected_cdf, expected_median) set.seed(101) - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + test <- convert_output_type(quantile_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> mean)", { - ex_ps <- seq(-2, 10, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) new_output_type <- "mean" new_output_type_id <- NA expected <- tibble::tibble( @@ -138,22 +108,11 @@ test_that("convert_output_type works (cdf >> mean)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% as_model_out_tbl() - test <- convert_output_type(model_out_tbl, new_output_type, new_output_type_id) + test <- convert_output_type(cdf_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> median)", { - ex_ps <- seq(-2, 10, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) new_output_type <- "median" new_output_type_id <- NA expected <- tibble::tibble( @@ -163,23 +122,11 @@ test_that("convert_output_type works (cdf >> median)", { dplyr::mutate(output_type = new_output_type, output_type_id = new_output_type_id) %>% as_model_out_tbl() - test <- convert_output_type(model_out_tbl, - new_output_type, new_output_type_id) + test <- convert_output_type(cdf_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) test_that("convert_output_type works (cdf >> quantile)", { - ex_ps <- seq(-2, 10, length.out = 500)[2:499] - model_out_tbl <- expand.grid( - grp1 = 1:2, - model_id = LETTERS[1:2], - output_type = "cdf", - output_type_id = ex_ps, - stringsAsFactors = FALSE - ) %>% - dplyr::mutate(mean = grp1 * ifelse(model_id == "A", 1, 3), - value = pnorm(output_type_id, mean)) %>% - dplyr::select(-mean) new_output_type <- "quantile" new_output_type_id <- c(0.25, 0.5, 0.75) expected <- tibble::as_tibble(expand.grid( @@ -193,8 +140,7 @@ test_that("convert_output_type works (cdf >> quantile)", { dplyr:: mutate(value = qnorm(output_type_id, grp1 * ifelse(model_id == "A", 1, 3))) %>% dplyr::arrange(model_id, grp1) %>% as_model_out_tbl() - test <- convert_output_type(model_out_tbl, - new_output_type, new_output_type_id) + test <- convert_output_type(cdf_outputs, new_output_type, new_output_type_id) expect_equal(test, expected, tolerance = 1e-2) }) From 874ad8160ba420e4d122d41bdc35e08035a33513 Mon Sep 17 00:00:00 2001 From: Li Shandross <57642277+lshandross@users.noreply.github.com> Date: Fri, 11 Oct 2024 16:50:37 -0400 Subject: [PATCH 46/46] Rename variable in `convert_output_types()` example to be more accurate cdf output type ids are individual values not bins --- R/convert_output_types.R | 4 ++-- man/convert_output_type.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/convert_output_types.R b/R/convert_output_types.R index 7f1b93f4..6c95055b 100644 --- a/R/convert_output_types.R +++ b/R/convert_output_types.R @@ -57,7 +57,7 @@ #' convert_output_type(model_out_tbl, new_output_type = "median", new_output_type_id = NA) #' #' # Next, we illustrate conversion from samples to quantile and cdf -#' ex_bins <- seq(-2,2,1) +#' ex_cdf_values <- seq(-2,2,1) #' ex_quantiles <- c(0.25, 0.5, 0.75) #' model_out_tbl <- expand.grid( #' stringsAsFactors = FALSE, @@ -69,7 +69,7 @@ #' dplyr::mutate(value = rnorm(200, mean = group1)) #' #' convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), -#' new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) +#' new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_cdf_values)) #' #' @return object of class `model_out_tbl` containing (only) the new output_type(s) #' for each unique combination of task IDs for each model diff --git a/man/convert_output_type.Rd b/man/convert_output_type.Rd index be64374a..4123d796 100644 --- a/man/convert_output_type.Rd +++ b/man/convert_output_type.Rd @@ -81,7 +81,7 @@ dplyr::mutate(value = qnorm(p = output_type_id, mean = group1)) convert_output_type(model_out_tbl, new_output_type = "median", new_output_type_id = NA) # Next, we illustrate conversion from samples to quantile and cdf -ex_bins <- seq(-2,2,1) +ex_cdf_values <- seq(-2,2,1) ex_quantiles <- c(0.25, 0.5, 0.75) model_out_tbl <- expand.grid( stringsAsFactors = FALSE, @@ -93,6 +93,6 @@ model_out_tbl <- expand.grid( dplyr::mutate(value = rnorm(200, mean = group1)) convert_output_type(model_out_tbl, new_output_type = c("quantile", "cdf"), - new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_bins)) + new_output_type_id = list("quantile" = ex_quantiles, "cdf" = ex_cdf_values)) }