From ed552925214231e06832a06948ee39296dcfe009 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 24 Jul 2024 16:38:29 -0400 Subject: [PATCH] Support multiple values_to, logicals, new checks --- R/checks.R | 29 ++++++ R/combine_checkboxes.R | 118 +++++++++++++---------- man/check_values_to_length.Rd | 19 ++++ man/get_metadata_ref.Rd | 4 +- tests/testthat/test-checks.R | 19 ++++ tests/testthat/test-combine_checkboxes.R | 87 +++++++++++++---- 6 files changed, 207 insertions(+), 69 deletions(-) create mode 100644 man/check_values_to_length.Rd diff --git a/R/checks.R b/R/checks.R index e763c864..b34102b1 100644 --- a/R/checks.R +++ b/R/checks.R @@ -712,3 +712,32 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) { ) } } + +#' @title +#' Check values_to length against detected number of checkbox fields +#' +#' @param col_groups a list of column groups identified by checkbox field detection +#' @param values_to a user defined character vector passed from [combine_checkboxes()] +#' @param call The calling environment to use in the error message +#' +#' @keywords internal +check_values_to_length <- function(col_groups, values_to, call = caller_env()) { + if (length(values_to) < length(names(col_groups))) { + cli_warn( + message = c( + `!` = "Detected fewer {.code values_to} arguments than the number of checkbox fields. Only the first {length(values_to)} will be used." + ), + class = c("checkbox_value_to_length", "REDCapTidieR_cond") + ) + } + + if (length(values_to) > length(names(col_groups))) { + cli_abort( + message = c( + `x` = "The number of {.code values_to} arguments supplied is greater than the number of checkbox fields detected.", + `i` = "{length(values_to)} {.code values_to} supplied, {length(names(col_groups))} checkbox fields detected." + ), + class = c("checkbox_value_to_length", "REDCapTidieR_cond") + ) + } +} diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 4f61625c..50dba153 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -48,68 +48,81 @@ combine_checkboxes <- function(supertbl, # Check args --- check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata")) check_arg_is_character(tbl, len = 1, any.missing = FALSE) - check_arg_is_character(values_to, len = 1, any.missing = FALSE) + check_arg_is_character(values_to, any.missing = FALSE) check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE) check_arg_is_character(values_fill, len = 1, any.missing = TRUE) check_arg_choices(raw_or_label, choices = c("label", "raw")) check_arg_is_logical(keep, len = 1, any.missing = FALSE) - # Save user cols to enquosure - cols_exp <- enquo(cols) - # Extract tbl from supertbl data_tbl <- supertbl %>% extract_tibble(tbl) - # Get field names from cols_exp, check that fields exist - field_names <- names(eval_select(cols_exp, data = data_tbl)) - check_fields_exist(fields = field_names, expr = cols_exp) + # Save user cols to quosure + cols_exp <- enquo(cols) - # Define values_to as the count of TRUEs/1s for the given checkbox field - # Assign TRUE if multiple selections made, and FALSE if one or zero made - data_tbl_mod <- data_tbl %>% - mutate( - !!values_to := case_when( - rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, - TRUE ~ FALSE - ) - ) + # Evaluate the cols expression to get the selected column names + selected_cols <- names(eval_select(cols_exp, data = data_tbl)) + check_fields_exist(fields = selected_cols, expr = cols_exp) # Check supplied fields exist + + # Extract the prefix of each selected column + prefixes <- sub("___.*", "", selected_cols) + + # Split the selected columns based on their prefixes + col_groups <- split(selected_cols, prefixes) + check_values_to_length(col_groups, values_to) # Check values_to columns match length of fields # Get metadata reference table, check that chosen fields are checkboxes metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] - metadata_ref <- get_metadata_ref(metadata_tbl, field_names) + metadata_ref <- get_metadata_ref(metadata_tbl, selected_cols) + + # Define values_to as the count of TRUEs/1s for the given checkbox field + # Assign TRUE if multiple selections made, and FALSE if one or zero made + data_tbl_mod <- data_tbl + + for (i in seq_along(values_to)) { + data_tbl_mod <- data_tbl_mod %>% + mutate( + !!values_to[i] := case_when( + rowSums(select(., col_groups[[i]])) > 1 ~ TRUE, + .default = FALSE + ) + ) + } # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% mutate(across( - field_names, + selected_cols, ~ replace_true(.x, - cur_column(), - metadata = metadata_ref, - raw_or_label = raw_or_label + cur_column(), + metadata = metadata_ref, + raw_or_label = raw_or_label ) - )) + ), + across(selected_cols, as.character) # enforce to character strings + ) - # Convert values_to from TRUE/FALSE to multi_value_label or identified single val - data_tbl_mod <- data_tbl_mod %>% - mutate( - across(field_names, as.character) # enforce to character strings - ) %>% - mutate( - !!values_to := ifelse(!!sym(values_to), - multi_value_label, - coalesce(!!!syms(field_names)) - ), - !!values_to := ifelse(is.na(!!sym(values_to)), - values_fill, - !!sym(values_to) + for (i in seq_along(values_to)) { + metadata_overwrite <- metadata_ref %>% filter(field_name %in% col_groups[[i]]) %>% pull(raw_or_label) + + data_tbl_mod <- data_tbl_mod %>% + mutate( + !!values_to[i] := ifelse(!!sym(values_to[i]), + multi_value_label, + coalesce(!!!syms(col_groups[[i]])) + ), + !!values_to[i] := ifelse(is.na(!!sym(values_to[i])), + values_fill, + !!sym(values_to[i]) + ) + ) %>% + mutate( + !!values_to[i] := factor(!!sym(values_to[i]), + levels = c(metadata_overwrite, multi_value_label, values_fill) + ) ) - ) %>% - mutate( - !!values_to := factor(!!sym(values_to), - levels = c(metadata_ref[[raw_or_label]], multi_value_label, values_fill) - ) - ) + } final_tbl <- bind_cols( data_tbl, @@ -119,7 +132,7 @@ combine_checkboxes <- function(supertbl, # Keep or remove original multi columns if (!keep) { final_tbl <- final_tbl %>% - select(-field_names) + select(-selected_cols) } # Update the supertbl data tibble @@ -131,17 +144,16 @@ combine_checkboxes <- function(supertbl, #' @title Utility function for getting metadata raw and label values for checkboxes #' #' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()]. -#' @param field_names Character string vector of field names for checkbox combination +#' @param selected_cols Character string vector of field names for checkbox combination #' #' @returns a tibble #' #' @keywords internal get_metadata_ref <- function(metadata_tbl, - field_names) { + selected_cols) { # Create a metadata reference table linking field name to raw and label values out <- metadata_tbl %>% - filter(.data$field_name %in% field_names) %>% - # TODO: original_field a temporary placeholder for future multi-field and mapping dev + filter(.data$field_name %in% selected_cols) %>% mutate( original_field = sub("___.*$", "", .data$field_name) ) @@ -149,11 +161,19 @@ get_metadata_ref <- function(metadata_tbl, # Make sure selection is checkbox metadata field type check_fields_are_checkboxes(out) - # TODO: Make more robust for multi-field and mapping, using original_field above - parsed_vals <- parse_labels(first(out$select_choices_or_calculations)) + # Bind raw/label values per original field grouping + parsed_vals <- tibble() + + for (i in seq_along(unique(out$original_field))) { + index <- unique(out$original_field)[i] + out_filtered <- out %>% filter(original_field == index) + + parsed_vals <- rbind(parsed_vals, parse_labels(first(out_filtered$select_choices_or_calculations))) + } bind_cols(out, parsed_vals) %>% - select(.data$field_name, .data$raw, .data$label) + select(.data$field_name, .data$raw, .data$label, original_field) %>% + relocate(original_field, .after = field_name) } #' @noRd diff --git a/man/check_values_to_length.Rd b/man/check_values_to_length.Rd new file mode 100644 index 00000000..e9194a08 --- /dev/null +++ b/man/check_values_to_length.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_values_to_length} +\alias{check_values_to_length} +\title{Check values_to length against detected number of checkbox fields} +\usage{ +check_values_to_length(col_groups, values_to, call = caller_env()) +} +\arguments{ +\item{col_groups}{a list of column groups identified by checkbox field detection} + +\item{values_to}{a user defined character vector passed from \code{\link[=combine_checkboxes]{combine_checkboxes()}}} + +\item{call}{The calling environment to use in the error message} +} +\description{ +Check values_to length against detected number of checkbox fields +} +\keyword{internal} diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd index 2c09c4bb..6ced220b 100644 --- a/man/get_metadata_ref.Rd +++ b/man/get_metadata_ref.Rd @@ -4,12 +4,12 @@ \alias{get_metadata_ref} \title{Utility function for getting metadata raw and label values for checkboxes} \usage{ -get_metadata_ref(metadata_tbl, field_names) +get_metadata_ref(metadata_tbl, selected_cols) } \arguments{ \item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} -\item{field_names}{Character string vector of field names for checkbox combination} +\item{selected_cols}{Character string vector of field names for checkbox combination} } \value{ a tibble diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 77435375..3fc71072 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -272,3 +272,22 @@ test_that("check_fields_are_checkboxes works", { expect_error(check_fields_are_checkboxes(metadata), class = "non_checkbox_fields") expect_no_error(check_fields_are_checkboxes(metadata_filtered)) }) + +test_that("check_values_to_length length works", { + col_groups <- list( + checkbox_1 = c("checkbox1___1", "checkbox1___2"), + checkbox_2 = c("checkbox2___1") + ) + + values_to <- c("new_col1", "new_col2") + + expect_no_message(check_values_to_length(col_groups, values_to)) + + values_to_warn <- "new_col1" + + expect_warning(check_values_to_length(col_groups, values_to_warn), class = "checkbox_value_to_length") + + values_to_error <- c("new_col1", "new_col2", "new_col3") + + expect_error(check_values_to_length(col_groups, values_to_error), class = "checkbox_value_to_length") +}) diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 45abb426..bb7af8f2 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -1,8 +1,8 @@ nonrepeat_data <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", - 1, TRUE, FALSE, FALSE, 1, - 2, TRUE, TRUE, FALSE, 2, - 3, FALSE, FALSE, FALSE, 3 + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", + 1, TRUE, FALSE, FALSE, TRUE, 1, + 2, TRUE, TRUE, FALSE, TRUE, 2, + 3, FALSE, FALSE, FALSE, FALSE, 3 ) nonrepeat_metadata <- tibble::tribble( @@ -11,7 +11,8 @@ nonrepeat_metadata <- tibble::tribble( "multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue", "multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue", "multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue", - "extra_data", "dropdown", "1, 1 | 2, 2 | 3,3" + "single_checkbox___1", "checkbox", "4, Green", + "extra_data", "dropdown", "1, 1 | 2, 2 | 3, 3" ) repeat_data <- tibble::tribble( @@ -62,10 +63,10 @@ test_that("combine_checkboxes works for nonrepeat instrument", { dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col", - 1, TRUE, FALSE, FALSE, 1, "Red", - 2, TRUE, TRUE, FALSE, 2, "multiple", - 3, FALSE, FALSE, FALSE, 3, "none" + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"new_col", + 1, TRUE, FALSE, FALSE, TRUE, 1, "Red", + 2, TRUE, TRUE, FALSE, TRUE, 2, "multiple", + 3, FALSE, FALSE, FALSE, FALSE, 3, "none" ) %>% mutate( new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none")) @@ -86,10 +87,10 @@ test_that("combine_checkboxes works for nonrepeat instrument and drop old values dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"extra_data", ~"new_col", - 1, 1, "Red", - 2, 2, "Multiple", - 3, 3, NA + ~"study_id", ~"single_checkbox___1", ~"extra_data", ~"new_col", + 1, TRUE, 1, "Red", + 2, TRUE, 2, "Multiple", + 3, FALSE, 3, NA ) %>% mutate( new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple")) @@ -124,14 +125,14 @@ test_that("combine_checkboxes works for repeat instrument", { test_that("get_metadata_ref works", { out <- get_metadata_ref( metadata_tbl = supertbl$redcap_metadata[[1]], - field_names = c("multi___1", "multi___2", "multi___3") + selected_cols = c("multi___1", "multi___2", "multi___3") ) expected_out <- tibble::tribble( - ~"field_name", ~"raw", ~"label", - "multi___1", "1", "Red", - "multi___2", "2", "Yellow", - "multi___3", "3", "Blue" + ~"field_name", ~"original_field", ~"raw", ~"label", + "multi___1", "multi", "1", "Red", + "multi___2", "multi", "2", "Yellow", + "multi___3", "multi", "3", "Blue" ) expect_equal(out, expected_out) @@ -155,3 +156,53 @@ test_that("replace_true works", { expect_equal(out, expected_out) }) + +test_that("combine_checkboxes works for multiple checkbox fields", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = c(starts_with("multi"), starts_with("single_checkbox")), + values_to = c("new_col1", "new_col2"), + keep = FALSE + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id",~"extra_data", ~"new_col1", ~"new_col2", + 1, 1, "Red", "Green", + 2, 2, "Multiple", "Green", + 3, 3, NA, NA + ) %>% + mutate( + new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")), + new_col2 = factor(new_col2, levels = c("Green", "Multiple")) + ) + + expect_equal(out, expected_out) +}) + +test_that("combine_checkboxes works for multiple checkbox fields with logicals", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = c(starts_with("multi")| starts_with("single_checkbox")), + values_to = c("new_col1", "new_col2"), + keep = FALSE + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id",~"extra_data", ~"new_col1", ~"new_col2", + 1, 1, "Red", "Green", + 2, 2, "Multiple", "Green", + 3, 3, NA, NA + ) %>% + mutate( + new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")), + new_col2 = factor(new_col2, levels = c("Green", "Multiple")) + ) + + expect_equal(out, expected_out) +})