From 9b0471bfa5c92188635991650ca1f3bfa3ecc2a5 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 10 Jul 2024 16:12:55 -0400 Subject: [PATCH 01/39] Reduce function initial draft --- DESCRIPTION | 2 +- R/reduce_multi_to_single_column.R | 75 +++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 R/reduce_multi_to_single_column.R diff --git a/DESCRIPTION b/DESCRIPTION index 3172fb75..2a75938d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -52,5 +52,5 @@ Config/testthat/edition: 3 Encoding: UTF-8 Language: en-US LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Roxygen: list(markdown = TRUE) diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R new file mode 100644 index 00000000..eab1c967 --- /dev/null +++ b/R/reduce_multi_to_single_column.R @@ -0,0 +1,75 @@ +reduce_multi_to_single_column <- function(supertbl, tbl, cols, raw_or_label = "label", cols_to, multi_val = "Multiple") { + + cols_exp <- enquo(cols) + + data_tbl <- supertbl %>% + extract_tibble(tbl) + + project_identifier <- supertbl$redcap_metadata[[1]]$field_name[[1]] + + out <- data_tbl %>% + select(project_identifier, !!!eval_select(cols_exp, data_tbl)) + + field_names <- names(out)[names(out) != project_identifier] + + out <- out %>% + mutate( + !!cols_to := case_when(rowSums(select(., starts_with("race"))) > 1 ~ TRUE, + TRUE ~ FALSE) + + ) + + metadata <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% + filter(field_name %in% names(out)[names(out) != project_identifier]) %>% + select(field_name, select_choices_or_calculations) %>% + mutate( + original_field = sub("___.*$", "", field_name) + ) %>% + mutate(pairs = strsplit(select_choices_or_calculations, " \\| "), + label_value = NA) + + for(i in seq(nrow(metadata))) { + metadata$label_value[i] <- metadata$pairs[[i]][i] + } + + metadata <- metadata %>% + tidyr::separate_wider_delim(label_value, delim = ", ", names = c("raw", "label")) %>% + select(field_name, raw, label) + + replace_true <- function(col, col_name, metadata, raw_or_label) { + replacement <- metadata %>% filter(field_name == col_name) %>% pull(raw_or_label) + col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 and 1 == TRUE + # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values + return(col) + } + + out <- out %>% + mutate(across(-c(project_identifier, !!cols_to), ~ replace_true(.x, + dplyr::cur_column(), + metadata = metadata, + raw_or_label = raw_or_label))) + + out <- out %>% + mutate(across(field_names, as.character), # enforce to character strings + # across(field_names, ~case_when(. == "FALSE" ~ NA_character_, TRUE ~ .)), + across(!!cols_to, ~as.character(.))) %>% + rowwise() %>% + mutate( + !!cols_to := ifelse(!!rlang::sym(cols_to) == "TRUE", multi_val, NA_character_), + !!cols_to := ifelse(is.na(!!rlang::sym(cols_to)), + ifelse(any(c_across(cols) != "FALSE"), + na.omit(c_across(cols)[c_across(cols) != "FALSE"])[1], + NA_character_), + !!rlang::sym(cols_to)) + ) %>% + ungroup() %>% + select(project_identifier, !!cols_to) %>% + mutate( + !!cols_to := factor(!!rlang::sym(cols_to), levels = c(metadata[[raw_or_label]], multi_val)) + ) + + out %>% + right_join(data_tbl, by = project_identifier) %>% + relocate(!!cols_to, .after = everything()) +} + From 859926fe50465eaa4df55121710f92e440736b22 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Thu, 11 Jul 2024 15:57:18 -0400 Subject: [PATCH 02/39] Reduce function fixes --- NAMESPACE | 9 + R/REDCapTidieR-package.R | 8 +- R/reduce_multi_to_single_column.R | 122 ++++++++----- man/reduce_multi_to_single_column.Rd | 38 +++++ renv.lock | 246 +++------------------------ 5 files changed, 153 insertions(+), 270 deletions(-) create mode 100644 man/reduce_multi_to_single_column.Rd diff --git a/NAMESPACE b/NAMESPACE index 60266b24..f967b2cf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(fmt_strip_trailing_punct) export(fmt_strip_whitespace) export(make_labelled) export(read_redcap) +export(reduce_multi_to_single_column) export(write_redcap_xlsx) importFrom(REDCapR,redcap_arm_export) importFrom(REDCapR,redcap_event_instruments) @@ -40,6 +41,8 @@ importFrom(dplyr,"%>%") importFrom(dplyr,across) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) +importFrom(dplyr,coalesce) +importFrom(dplyr,cur_column) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,if_any) @@ -50,10 +53,13 @@ importFrom(dplyr,pull) importFrom(dplyr,recode) importFrom(dplyr,relocate) importFrom(dplyr,rename) +importFrom(dplyr,right_join) importFrom(dplyr,row_number) +importFrom(dplyr,rowwise) importFrom(dplyr,select) importFrom(dplyr,slice) importFrom(dplyr,summarise) +importFrom(dplyr,ungroup) importFrom(formattable,percent) importFrom(lobstr,obj_size) importFrom(lubridate,is.Date) @@ -103,6 +109,8 @@ importFrom(rlang,is_bare_list) importFrom(rlang,is_installed) importFrom(rlang,new_environment) importFrom(rlang,quo_get_expr) +importFrom(rlang,sym) +importFrom(rlang,syms) importFrom(rlang,try_fetch) importFrom(rlang,zap) importFrom(stats,na.omit) @@ -121,6 +129,7 @@ importFrom(tidyr,complete) importFrom(tidyr,fill) importFrom(tidyr,nest) importFrom(tidyr,pivot_wider) +importFrom(tidyr,separate_wider_delim) importFrom(tidyr,unnest) importFrom(tidyr,unnest_wider) importFrom(tidyselect,all_of) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index b4e150a2..89a04b5f 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -5,7 +5,8 @@ #' expect_factor expect_logical #' @importFrom cli cli_abort cli_fmt cli_text cli_vec cli_warn qty #' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else -#' left_join mutate pull recode relocate rename row_number select slice summarise +#' left_join mutate pull recode relocate rename right_join row_number rowwise +#' select slice summarise ungroup coalesce cur_column #' @importFrom formattable percent #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date @@ -15,12 +16,13 @@ #' @importFrom rlang .data !!! abort as_closure caller_arg caller_env catch_cnd #' check_installed cnd_muffle current_call current_env enexpr enquo env_poke #' eval_tidy get_env global_env is_atomic is_bare_formula is_bare_list -#' is_installed new_environment quo_get_expr try_fetch zap as_label +#' is_installed new_environment quo_get_expr try_fetch zap as_label sym syms #' @importFrom stringi stri_split_fixed #' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc #' str_trim str_ends #' @importFrom tibble as_tibble is_tibble tibble -#' @importFrom tidyr complete fill pivot_wider nest unnest unnest_wider +#' @importFrom tidyr complete fill pivot_wider nest separate_wider_delim unnest +#' unnest_wider #' @importFrom tidyselect all_of any_of ends_with eval_select everything #' starts_with where #' @importFrom vctrs vec_ptype_abbr vec_ptype diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index eab1c967..839de57d 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -1,75 +1,117 @@ -reduce_multi_to_single_column <- function(supertbl, tbl, cols, raw_or_label = "label", cols_to, multi_val = "Multiple") { +#' @title Reduce Multiple Choice Checkbox Fields to Single Column +#' +#' @description +#' Convert checkbox fields in a data tibble, represented as many columns, into +#' a single column with values. +#' +#' @param supertbl A supertibble generated by [read_redcap()]. Required. +#' @param tbl The `redcap_form_name` of the data tibble under analysis. Required. +#' @param cols One or more columns to specify the checkbox field names for conversion. Required. +#' @param cols_to A string for the name of the column to consolidate values under. Required. +#' @param multi_to A string to specify the placeholder value for rows where multiple +#' checkboxes are selected. Default "Multiple". +#' @param raw_or_label A string (either 'raw' or 'label') that specifies whether +#' to export the raw coded values or the labels for the options of categorical +#' fields. Default is 'label'. +#' +#' @returns A `tibble`. +#' +#' @export +reduce_multi_to_single_column <- function(supertbl, + tbl, + cols, + cols_to, + multi_val = "Multiple", + raw_or_label = "label") { + # Save user cols to enquosure cols_exp <- enquo(cols) + # Extract tbl from supertbl data_tbl <- supertbl %>% extract_tibble(tbl) - project_identifier <- supertbl$redcap_metadata[[1]]$field_name[[1]] + # Assume the first instrument in the metadata contains IDs + # REDCap enforces this constraints, we reflect this in read_redcap -> get_field_to_drop + record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]$field_name[1] + + # Combine record identifier with remaining possible project identifiers + project_identifiers <- c(record_id_field, "redcap_form_instance", "redcap_form_name", "redcap_event", "redcap_event_instance") out <- data_tbl %>% - select(project_identifier, !!!eval_select(cols_exp, data_tbl)) + select(any_of(project_identifiers), !!!eval_select(cols_exp, data_tbl)) - field_names <- names(out)[names(out) != project_identifier] + # Define field names as remaining vars defined by the user that aren't identifiers + field_names <- names(out)[!names(out) %in% project_identifiers] + # Define cols_to as the count of TRUEs/1s for the given checkbox field + # Assign TRUE if multiple selections made, and FALSE if one or zero made out <- out %>% mutate( - !!cols_to := case_when(rowSums(select(., starts_with("race"))) > 1 ~ TRUE, + !!cols_to := case_when(rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, TRUE ~ FALSE) ) - metadata <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% - filter(field_name %in% names(out)[names(out) != project_identifier]) %>% - select(field_name, select_choices_or_calculations) %>% - mutate( - original_field = sub("___.*$", "", field_name) - ) %>% - mutate(pairs = strsplit(select_choices_or_calculations, " \\| "), - label_value = NA) - - for(i in seq(nrow(metadata))) { - metadata$label_value[i] <- metadata$pairs[[i]][i] - } - - metadata <- metadata %>% - tidyr::separate_wider_delim(label_value, delim = ", ", names = c("raw", "label")) %>% - select(field_name, raw, label) - - replace_true <- function(col, col_name, metadata, raw_or_label) { - replacement <- metadata %>% filter(field_name == col_name) %>% pull(raw_or_label) - col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 and 1 == TRUE - # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values - return(col) - } + # Get metadata reference table + metadata <- get_metadata_ref(out, supertbl, tbl, project_identifiers) + # Replace TRUEs/1s with raw/label values from metadata out <- out %>% - mutate(across(-c(project_identifier, !!cols_to), ~ replace_true(.x, - dplyr::cur_column(), + mutate(across(-c(any_of(project_identifiers), !!cols_to), ~ replace_true(.x, + cur_column(), metadata = metadata, raw_or_label = raw_or_label))) + + # Convert cols_to from TRUE/FALSE to multi_val or identified single val out <- out %>% mutate(across(field_names, as.character), # enforce to character strings - # across(field_names, ~case_when(. == "FALSE" ~ NA_character_, TRUE ~ .)), across(!!cols_to, ~as.character(.))) %>% rowwise() %>% mutate( - !!cols_to := ifelse(!!rlang::sym(cols_to) == "TRUE", multi_val, NA_character_), - !!cols_to := ifelse(is.na(!!rlang::sym(cols_to)), - ifelse(any(c_across(cols) != "FALSE"), - na.omit(c_across(cols)[c_across(cols) != "FALSE"])[1], - NA_character_), - !!rlang::sym(cols_to)) + !!cols_to := ifelse(!!sym(cols_to) == "TRUE", + multi_val, + coalesce(!!!syms(field_names))) ) %>% ungroup() %>% - select(project_identifier, !!cols_to) %>% + select(any_of(project_identifiers), !!cols_to) %>% mutate( - !!cols_to := factor(!!rlang::sym(cols_to), levels = c(metadata[[raw_or_label]], multi_val)) + !!cols_to := factor(!!sym(cols_to), levels = c(metadata[[raw_or_label]], multi_val)) ) + # Join back onto original data tbl out %>% - right_join(data_tbl, by = project_identifier) %>% + right_join(data_tbl, by = intersect(project_identifiers, names(out))) %>% relocate(!!cols_to, .after = everything()) } +#' @noRd +#' @keywords internal +get_metadata_ref <- function(data, supertbl, tbl, project_identifiers) { + out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% + filter(field_name %in% names(data)[!names(data) %in% project_identifiers]) %>% + select(field_name, select_choices_or_calculations) %>% + mutate( + original_field = sub("___.*$", "", field_name) + ) %>% + mutate(pairs = strsplit(select_choices_or_calculations, " \\| "), + label_value = NA) + + for (i in seq(nrow(out))) { + out$label_value[i] <- out$pairs[[i]][i] + } + + out %>% + separate_wider_delim(label_value, delim = ", ", names = c("raw", "label")) %>% + select(field_name, raw, label) +} + +#' @noRd +#' @keywords internal +replace_true <- function(col, col_name, metadata, raw_or_label) { + replacement <- metadata %>% filter(field_name == col_name) %>% pull(raw_or_label) + col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 and 1 == TRUE + # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values + return(col) +} diff --git a/man/reduce_multi_to_single_column.Rd b/man/reduce_multi_to_single_column.Rd new file mode 100644 index 00000000..1ca22ba6 --- /dev/null +++ b/man/reduce_multi_to_single_column.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reduce_multi_to_single_column.R +\name{reduce_multi_to_single_column} +\alias{reduce_multi_to_single_column} +\title{Reduce Multiple Choice Checkbox Fields to Single Column} +\usage{ +reduce_multi_to_single_column( + supertbl, + tbl, + cols, + cols_to, + multi_val = "Multiple", + raw_or_label = "label" +) +} +\arguments{ +\item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}. Required.} + +\item{tbl}{The \code{redcap_form_name} of the data tibble under analysis. Required.} + +\item{cols}{One or more columns to specify the checkbox field names for conversion. Required.} + +\item{cols_to}{A string for the name of the column to consolidate values under. Required.} + +\item{raw_or_label}{A string (either 'raw' or 'label') that specifies whether +to export the raw coded values or the labels for the options of categorical +fields. Default is 'label'.} + +\item{multi_to}{A string to specify the placeholder value for rows where multiple +checkboxes are selected. Default "Multiple".} +} +\value{ +A \code{tibble}. +} +\description{ +Convert checkbox fields in a data tibble, represented as many columns, into +a single column with values. +} diff --git a/renv.lock b/renv.lock index b29e066a..27491c14 100644 --- a/renv.lock +++ b/renv.lock @@ -9,17 +9,6 @@ ] }, "Packages": { - "DBI": { - "Package": "DBI", - "Version": "1.2.2", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "methods" - ], - "Hash": "164809cd72e1d5160b4cb3aa57f510fe" - }, "R.cache": { "Package": "R.cache", "Version": "0.16.0", @@ -104,25 +93,6 @@ ], "Hash": "e76c401b631961c865b89bb5a4ea3b97" }, - "RSQLite": { - "Package": "RSQLite", - "Version": "2.3.5", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "DBI", - "R", - "bit64", - "blob", - "cpp11", - "memoise", - "methods", - "pkgconfig", - "plogr", - "rlang" - ], - "Hash": "f5a75d57e0a3014a6ef537ac04a80fc6" - }, "Rcpp": { "Package": "Rcpp", "Version": "1.0.12", @@ -144,16 +114,6 @@ ], "Hash": "cad6cf7f1d5f6e906700b9d3e718c796" }, - "assertthat": { - "Package": "assertthat", - "Version": "0.2.1", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "tools" - ], - "Hash": "50c838a310445e954bc13f26f26a6ecf" - }, "backports": { "Package": "backports", "Version": "1.4.1", @@ -198,18 +158,6 @@ ], "Hash": "9fe98599ca456d6552421db0d6772d8f" }, - "blob": { - "Package": "blob", - "Version": "1.2.4", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "methods", - "rlang", - "vctrs" - ], - "Hash": "40415719b5a479b87949f3aa0aee737c" - }, "brew": { "Package": "brew", "Version": "1.0-10", @@ -286,14 +234,14 @@ }, "cli": { "Package": "cli", - "Version": "3.6.2", + "Version": "3.6.3", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R", "utils" ], - "Hash": "1216ac65ac55ec0058a6f75d7ca0fd52" + "Hash": "b21916dd77a27642b447374a5d30ecf3" }, "clipr": { "Package": "clipr", @@ -359,46 +307,6 @@ ], "Hash": "5a295d7d963cc5035284dcdbaf334f4e" }, - "crancache": { - "Package": "crancache", - "Version": "0.0.0.9001", - "Source": "GitHub", - "RemoteType": "github", - "RemoteHost": "api.github.com", - "RemoteRepo": "crancache", - "RemoteUsername": "r-lib", - "RemoteRef": "HEAD", - "RemoteSha": "7ea4e479bdf780adadd1bd421a5ca23e5f951697", - "Requirements": [ - "callr", - "cranlike", - "curl", - "desc", - "digest", - "parsedate", - "rappdirs", - "rematch2", - "tools", - "utils", - "withr" - ], - "Hash": "795b8389734f11481fdcdf9cdde3002f" - }, - "cranlike": { - "Package": "cranlike", - "Version": "1.0.2", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "DBI", - "RSQLite", - "debugme", - "desc", - "tools", - "utils" - ], - "Hash": "2a531c8d1d45799fe2e880708dfc1097" - }, "crayon": { "Package": "crayon", "Version": "1.5.2", @@ -449,17 +357,6 @@ ], "Hash": "cdc4a473222b0112d4df0bcfbed12d44" }, - "debugme": { - "Package": "debugme", - "Version": "1.1.0", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "crayon", - "grDevices" - ], - "Hash": "2d8a9e4f08f3dd669cb8ddd1eb575959" - }, "desc": { "Package": "desc", "Version": "1.4.3", @@ -671,28 +568,6 @@ ], "Hash": "47b5f30c720c23999b913a1a635cf0bb" }, - "gargle": { - "Package": "gargle", - "Version": "1.5.2", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "cli", - "fs", - "glue", - "httr", - "jsonlite", - "lifecycle", - "openssl", - "rappdirs", - "rlang", - "stats", - "utils", - "withr" - ], - "Hash": "fc0b272e5847c58cd5da9b20eedbd026" - }, "generics": { "Package": "generics", "Version": "0.1.3", @@ -756,28 +631,6 @@ ], "Hash": "e0b3a53876554bd45879e596cdb10a52" }, - "gmailr": { - "Package": "gmailr", - "Version": "2.0.0", - "Source": "Repository", - "Repository": "CRAN", - "Requirements": [ - "R", - "base64enc", - "cli", - "crayon", - "gargle", - "glue", - "httr", - "jsonlite", - "lifecycle", - "mime", - "rappdirs", - "rematch2", - "rlang" - ], - "Hash": "3c643bd9639dbbda700d2593644c053b" - }, "goodpractice": { "Package": "goodpractice", "Version": "1.0.4", @@ -826,14 +679,14 @@ }, "highr": { "Package": "highr", - "Version": "0.10", + "Version": "0.11", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R", "xfun" ], - "Hash": "06230136b2d2b9ba5805e1963fa6e890" + "Hash": "d65ba49117ca223614f71b60d85b8ab7" }, "hms": { "Package": "hms", @@ -973,7 +826,7 @@ }, "knitr": { "Package": "knitr", - "Version": "1.45", + "Version": "1.48", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -985,7 +838,7 @@ "xfun", "yaml" ], - "Hash": "1ec462871063897135c1bcbe0fc8f07d" + "Hash": "acf380f300c721da9fde7df115a5f86f" }, "labelled": { "Package": "labelled", @@ -1168,13 +1021,6 @@ ], "Hash": "9fa7cdc5fbdb1c8511fdde72a944db63" }, - "parsedate": { - "Package": "parsedate", - "Version": "1.3.1", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "7f5024cc7af45eeecef657fa62beb568" - }, "pillar": { "Package": "pillar", "Version": "1.9.0", @@ -1268,13 +1114,6 @@ ], "Hash": "876c618df5ae610be84356d5d7a5d124" }, - "plogr": { - "Package": "plogr", - "Version": "0.2.0", - "Source": "Repository", - "Repository": "CRAN", - "Hash": "09eb987710984fc2905c7129c7d85e65" - }, "praise": { "Package": "praise", "Version": "1.0.0", @@ -1521,53 +1360,6 @@ ], "Hash": "1425f91b4d5d9a8f25352c44a3d914ed" }, - "revdepcheck": { - "Package": "revdepcheck", - "Version": "1.0.0.9001", - "Source": "GitHub", - "RemoteType": "github", - "Remotes": "r-lib/crancache", - "RemoteHost": "api.github.com", - "RemoteRepo": "revdepcheck", - "RemoteUsername": "r-lib", - "RemoteRef": "HEAD", - "RemoteSha": "39b808ef3b2ebcf16cb355f737cba1a78e29368a", - "Remotes": "r-lib/crancache", - "Requirements": [ - "DBI", - "RSQLite", - "assertthat", - "brio", - "callr", - "cli", - "clisymbols", - "crancache", - "crayon", - "curl", - "desc", - "glue", - "gmailr", - "hms", - "httr", - "jsonlite", - "knitr", - "pkgbuild", - "prettyunits", - "processx", - "progress", - "rcmdcheck", - "rematch2", - "remotes", - "rlang", - "sessioninfo", - "tibble", - "utils", - "whoami", - "withr", - "yaml" - ], - "Hash": "f47ecbb03333506fc30a883b9af175f1" - }, "rex": { "Package": "rex", "Version": "1.2.1", @@ -1606,18 +1398,18 @@ }, "rlang": { "Package": "rlang", - "Version": "1.1.3", + "Version": "1.1.4", "Source": "Repository", "Repository": "CRAN", "Requirements": [ "R", "utils" ], - "Hash": "42548638fae05fd9a9b5f3f437fbbbe2" + "Hash": "3eec01f8b1dee337674b2e34ab1f9bc1" }, "rmarkdown": { "Package": "rmarkdown", - "Version": "2.26", + "Version": "2.27", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1636,11 +1428,11 @@ "xfun", "yaml" ], - "Hash": "9b148e7f95d33aac01f31282d49e4f44" + "Hash": "27f9502e1cdbfa195f94e03b0f517484" }, "roxygen2": { "Package": "roxygen2", - "Version": "7.3.1", + "Version": "7.3.2", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1662,7 +1454,7 @@ "withr", "xml2" ], - "Hash": "c25fe7b2d8cba73d1b63c947bf7afdb9" + "Hash": "6ee25f9054a70f44d615300ed531ba8d" }, "rprojroot": { "Package": "rprojroot", @@ -1803,16 +1595,16 @@ }, "stringi": { "Package": "stringi", - "Version": "1.8.3", + "Version": "1.8.4", "Source": "Repository", - "Repository": "RSPM", + "Repository": "CRAN", "Requirements": [ "R", "stats", "tools", "utils" ], - "Hash": "058aebddea264f4c99401515182e656a" + "Hash": "39e1144fd75428983dc3f63aa53dfa91" }, "stringr": { "Package": "stringr", @@ -1870,7 +1662,7 @@ }, "testthat": { "Package": "testthat", - "Version": "3.2.1", + "Version": "3.2.1.1", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -1895,7 +1687,7 @@ "waldo", "withr" ], - "Hash": "4767a686ebe986e6cb01d075b3f09729" + "Hash": "3f6e7e5e2220856ff865e4834766bf2b" }, "textshaping": { "Package": "textshaping", @@ -2145,7 +1937,7 @@ }, "xfun": { "Package": "xfun", - "Version": "0.42", + "Version": "0.45", "Source": "Repository", "Repository": "CRAN", "Requirements": [ @@ -2153,7 +1945,7 @@ "stats", "tools" ], - "Hash": "fd1349170df31f7a10bd98b0189e85af" + "Hash": "ca59c87fe305b16a9141a5874c3a7889" }, "xml2": { "Package": "xml2", From c42399ddb2ba10e919e886d11af99220b56170fa Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Thu, 11 Jul 2024 16:50:41 -0400 Subject: [PATCH 03/39] Small fixes --- R/reduce_multi_to_single_column.R | 32 +++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index 839de57d..213a915b 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -24,6 +24,7 @@ reduce_multi_to_single_column <- function(supertbl, cols_to, multi_val = "Multiple", raw_or_label = "label") { + # Save user cols to enquosure cols_exp <- enquo(cols) @@ -31,22 +32,20 @@ reduce_multi_to_single_column <- function(supertbl, data_tbl <- supertbl %>% extract_tibble(tbl) + # Get field names from cols_exp + field_names <- names(eval_select(cols_exp, data = data_tbl)) + # Assume the first instrument in the metadata contains IDs # REDCap enforces this constraints, we reflect this in read_redcap -> get_field_to_drop record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]$field_name[1] # Combine record identifier with remaining possible project identifiers - project_identifiers <- c(record_id_field, "redcap_form_instance", "redcap_form_name", "redcap_event", "redcap_event_instance") - - out <- data_tbl %>% - select(any_of(project_identifiers), !!!eval_select(cols_exp, data_tbl)) - - # Define field names as remaining vars defined by the user that aren't identifiers - field_names <- names(out)[!names(out) %in% project_identifiers] + instrument_identifiers <- c(record_id_field, "redcap_form_instance", "redcap_form_name", "redcap_event", "redcap_event_instance") # Define cols_to as the count of TRUEs/1s for the given checkbox field # Assign TRUE if multiple selections made, and FALSE if one or zero made - out <- out %>% + out <- data_tbl %>% + select(any_of(instrument_identifiers), !!!eval_select(cols_exp, data_tbl)) %>% mutate( !!cols_to := case_when(rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, TRUE ~ FALSE) @@ -54,11 +53,11 @@ reduce_multi_to_single_column <- function(supertbl, ) # Get metadata reference table - metadata <- get_metadata_ref(out, supertbl, tbl, project_identifiers) + metadata <- get_metadata_ref(out, supertbl, tbl, instrument_identifiers) # Replace TRUEs/1s with raw/label values from metadata out <- out %>% - mutate(across(-c(any_of(project_identifiers), !!cols_to), ~ replace_true(.x, + mutate(across(-c(any_of(instrument_identifiers), !!cols_to), ~ replace_true(.x, cur_column(), metadata = metadata, raw_or_label = raw_or_label))) @@ -75,22 +74,24 @@ reduce_multi_to_single_column <- function(supertbl, coalesce(!!!syms(field_names))) ) %>% ungroup() %>% - select(any_of(project_identifiers), !!cols_to) %>% + select(any_of(instrument_identifiers), !!cols_to) %>% mutate( !!cols_to := factor(!!sym(cols_to), levels = c(metadata[[raw_or_label]], multi_val)) ) # Join back onto original data tbl out %>% - right_join(data_tbl, by = intersect(project_identifiers, names(out))) %>% + right_join(data_tbl, by = intersect(instrument_identifiers, names(out))) %>% relocate(!!cols_to, .after = everything()) } #' @noRd #' @keywords internal -get_metadata_ref <- function(data, supertbl, tbl, project_identifiers) { +get_metadata_ref <- function(data, supertbl, tbl, instrument_identifiers) { + + # Create a metadata reference table linking field name to raw and label values out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% - filter(field_name %in% names(data)[!names(data) %in% project_identifiers]) %>% + filter(field_name %in% names(data)[!names(data) %in% instrument_identifiers]) %>% select(field_name, select_choices_or_calculations) %>% mutate( original_field = sub("___.*$", "", field_name) @@ -105,11 +106,14 @@ get_metadata_ref <- function(data, supertbl, tbl, project_identifiers) { out %>% separate_wider_delim(label_value, delim = ", ", names = c("raw", "label")) %>% select(field_name, raw, label) + } #' @noRd #' @keywords internal replace_true <- function(col, col_name, metadata, raw_or_label) { + + # Replace TRUEs/1s with the appropriate raw or label value from the metadata replacement <- metadata %>% filter(field_name == col_name) %>% pull(raw_or_label) col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 and 1 == TRUE # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values From 8862ac1467155142c781279475dc404731596cc9 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 12 Jul 2024 10:06:42 -0400 Subject: [PATCH 04/39] Draft tests, add no_val param --- R/reduce_multi_to_single_column.R | 8 +- man/reduce_multi_to_single_column.Rd | 4 + .../test-reduce_multi_to_single_column.R | 77 +++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/test-reduce_multi_to_single_column.R diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index 213a915b..473fa92a 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -10,6 +10,8 @@ #' @param cols_to A string for the name of the column to consolidate values under. Required. #' @param multi_to A string to specify the placeholder value for rows where multiple #' checkboxes are selected. Default "Multiple". +#' @param no_val A string indicating a value to use when no checkboxes are selected. +#' Default `NA`. #' @param raw_or_label A string (either 'raw' or 'label') that specifies whether #' to export the raw coded values or the labels for the options of categorical #' fields. Default is 'label'. @@ -23,6 +25,7 @@ reduce_multi_to_single_column <- function(supertbl, cols, cols_to, multi_val = "Multiple", + no_val = NA, raw_or_label = "label") { # Save user cols to enquosure @@ -71,12 +74,13 @@ reduce_multi_to_single_column <- function(supertbl, mutate( !!cols_to := ifelse(!!sym(cols_to) == "TRUE", multi_val, - coalesce(!!!syms(field_names))) + coalesce(!!!syms(field_names))), + !!cols_to := ifelse(is.na(!!sym(cols_to)), no_val, !!sym(cols_to)) ) %>% ungroup() %>% select(any_of(instrument_identifiers), !!cols_to) %>% mutate( - !!cols_to := factor(!!sym(cols_to), levels = c(metadata[[raw_or_label]], multi_val)) + !!cols_to := factor(!!sym(cols_to), levels = c(metadata[[raw_or_label]], multi_val, no_val)) ) # Join back onto original data tbl diff --git a/man/reduce_multi_to_single_column.Rd b/man/reduce_multi_to_single_column.Rd index 1ca22ba6..7722fba0 100644 --- a/man/reduce_multi_to_single_column.Rd +++ b/man/reduce_multi_to_single_column.Rd @@ -10,6 +10,7 @@ reduce_multi_to_single_column( cols, cols_to, multi_val = "Multiple", + no_val = NA, raw_or_label = "label" ) } @@ -22,6 +23,9 @@ reduce_multi_to_single_column( \item{cols_to}{A string for the name of the column to consolidate values under. Required.} +\item{no_val}{A string indicating a value to use when no checkboxes are selected. +Default \code{NA}.} + \item{raw_or_label}{A string (either 'raw' or 'label') that specifies whether to export the raw coded values or the labels for the options of categorical fields. Default is 'label'.} diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R new file mode 100644 index 00000000..3de4d990 --- /dev/null +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -0,0 +1,77 @@ +nonrepeat_data <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", + 1, TRUE, FALSE, FALSE, + 2, TRUE, TRUE, FALSE, + 3, FALSE, FALSE, FALSE +) + +nonrepeat_metadata <- tibble::tribble( + ~"field_name", ~"select_choices_or_calculations", + "study_id", NA, + "multi___1", "1, Red | 2, Yellow | 3, Blue", + "multi___2", "1, Red | 2, Yellow | 3, Blue", + "multi___3", "1, Red | 2, Yellow | 3, Blue" +) + +repeat_data <- tibble::tribble( + ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", + 1, "event_1", 1, TRUE, FALSE, + 2, "event_1", 1, TRUE, TRUE, + 2, "event_1", 2, FALSE, FALSE +) + +repeat_metadata <- tibble::tribble( + ~"field_name", ~"select_choices_or_calculations", + "study_id", NA, + "repeat___1", "1, A | 2, B | 3, C", + "repeat___2", "1, A | 2, B | 3, C" +) + +supertbl <- tibble::tribble( + ~"redcap_form_name", ~"redcap_data", ~"redcap_metadata", + "nonrepeat_instrument", nonrepeat_data, nonrepeat_metadata, + "repeat_instrument", repeat_data, repeat_metadata +) + +class(supertbl) <- c("redcap_supertbl", class(supertbl)) + + +test_that("reduce_multo_to_single works for nonrepeat instrument", { + out <- reduce_multi_to_single_column(supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = starts_with("multi"), + cols_to = "new_col", + multi_val = "multiple", # multi_val declared + no_val = "none") # no_val declared + + expected_out <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"new_col", + 1, TRUE, FALSE, FALSE, "Red", + 2, TRUE, TRUE, FALSE, "multiple", + 3, FALSE, FALSE, FALSE, "none" + ) %>% + mutate( + new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none")) + ) + + expect_equal(out, expected_out) +}) + +test_that("reduce_multo_to_single works for repeat instrument", { + out <- reduce_multi_to_single_column(supertbl = supertbl, + tbl = "repeat_instrument", + cols = starts_with("repeat"), + cols_to = "new_col") + + expected_out <- tibble::tribble( + ~"study_id", ~"redcap_form_instance", ~"redcap_event", ~"repeat___1", ~"repeat___2", ~"new_col", + 1, 1, "event_1", TRUE, FALSE, "A", + 2, 1, "event_1", TRUE, TRUE, "Multiple", + 2, 2, "event_1", FALSE, FALSE, NA + ) %>% + mutate( + new_col = factor(new_col, levels = c("A", "B", "Multiple")) + ) + + expect_equal(out, expected_out) +}) From 96c3309959caa5d574ae192604ce86ceef04bea1 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 12 Jul 2024 10:18:25 -0400 Subject: [PATCH 05/39] Add keep param --- R/reduce_multi_to_single_column.R | 14 +++++++++---- .../test-reduce_multi_to_single_column.R | 20 +++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index 473fa92a..1c9116a8 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -15,6 +15,7 @@ #' @param raw_or_label A string (either 'raw' or 'label') that specifies whether #' to export the raw coded values or the labels for the options of categorical #' fields. Default is 'label'. +#' @param keep Whether or not to keep the original fields in the output. Default `TRUE`. #' #' @returns A `tibble`. #' @@ -26,7 +27,8 @@ reduce_multi_to_single_column <- function(supertbl, cols_to, multi_val = "Multiple", no_val = NA, - raw_or_label = "label") { + raw_or_label = "label", + keep = TRUE) { # Save user cols to enquosure cols_exp <- enquo(cols) @@ -84,9 +86,13 @@ reduce_multi_to_single_column <- function(supertbl, ) # Join back onto original data tbl - out %>% - right_join(data_tbl, by = intersect(instrument_identifiers, names(out))) %>% - relocate(!!cols_to, .after = everything()) + if (keep) { + out %>% + right_join(data_tbl, by = intersect(instrument_identifiers, names(out))) %>% + relocate(!!cols_to, .after = everything()) + } else { + out + } } #' @noRd diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index 3de4d990..8bd6e10f 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -57,6 +57,26 @@ test_that("reduce_multo_to_single works for nonrepeat instrument", { expect_equal(out, expected_out) }) +test_that("reduce_multo_to_single works for nonrepeat instrument and drop old values", { + out <- reduce_multi_to_single_column(supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = starts_with("multi"), + cols_to = "new_col", + keep = FALSE) + + expected_out <- tibble::tribble( + ~"study_id", ~"new_col", + 1, "Red", + 2, "Multiple", + 3, NA + ) %>% + mutate( + new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple")) + ) + + expect_equal(out, expected_out) +}) + test_that("reduce_multo_to_single works for repeat instrument", { out <- reduce_multi_to_single_column(supertbl = supertbl, tbl = "repeat_instrument", From 218fca496af24b4e8e646b037e321acbd3a72a83 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 09:23:00 -0400 Subject: [PATCH 06/39] Fix `keep` param --- R/reduce_multi_to_single_column.R | 12 +++++--- .../test-reduce_multi_to_single_column.R | 30 +++++++++---------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index 1c9116a8..55fa273f 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -86,12 +86,16 @@ reduce_multi_to_single_column <- function(supertbl, ) # Join back onto original data tbl + out <- out %>% + right_join(data_tbl, by = intersect(instrument_identifiers, names(out))) %>% + relocate(!!cols_to, .after = everything()) + + # Keep or remove original multi columns if (keep) { - out %>% - right_join(data_tbl, by = intersect(instrument_identifiers, names(out))) %>% - relocate(!!cols_to, .after = everything()) - } else { out + } else { + out %>% + select(-field_names) } } diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index 8bd6e10f..dc4c8aad 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -1,8 +1,8 @@ nonrepeat_data <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", - 1, TRUE, FALSE, FALSE, - 2, TRUE, TRUE, FALSE, - 3, FALSE, FALSE, FALSE + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", + 1, TRUE, FALSE, FALSE, 1, + 2, TRUE, TRUE, FALSE, 2, + 3, FALSE, FALSE, FALSE, 3 ) nonrepeat_metadata <- tibble::tribble( @@ -10,7 +10,8 @@ nonrepeat_metadata <- tibble::tribble( "study_id", NA, "multi___1", "1, Red | 2, Yellow | 3, Blue", "multi___2", "1, Red | 2, Yellow | 3, Blue", - "multi___3", "1, Red | 2, Yellow | 3, Blue" + "multi___3", "1, Red | 2, Yellow | 3, Blue", + "extra_data", "1, 1 | 2, 2 | 3,3" ) repeat_data <- tibble::tribble( @@ -35,7 +36,6 @@ supertbl <- tibble::tribble( class(supertbl) <- c("redcap_supertbl", class(supertbl)) - test_that("reduce_multo_to_single works for nonrepeat instrument", { out <- reduce_multi_to_single_column(supertbl = supertbl, tbl = "nonrepeat_instrument", @@ -45,10 +45,10 @@ test_that("reduce_multo_to_single works for nonrepeat instrument", { no_val = "none") # no_val declared expected_out <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"new_col", - 1, TRUE, FALSE, FALSE, "Red", - 2, TRUE, TRUE, FALSE, "multiple", - 3, FALSE, FALSE, FALSE, "none" + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col", + 1, TRUE, FALSE, FALSE, 1, "Red", + 2, TRUE, TRUE, FALSE, 2, "multiple", + 3, FALSE, FALSE, FALSE, 3, "none" ) %>% mutate( new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none")) @@ -62,13 +62,13 @@ test_that("reduce_multo_to_single works for nonrepeat instrument and drop old va tbl = "nonrepeat_instrument", cols = starts_with("multi"), cols_to = "new_col", - keep = FALSE) + keep = FALSE) # Test keep = FALSE expected_out <- tibble::tribble( - ~"study_id", ~"new_col", - 1, "Red", - 2, "Multiple", - 3, NA + ~"study_id", ~"extra_data", ~"new_col", + 1, 1, "Red", + 2, 2, "Multiple", + 3, 3, NA ) %>% mutate( new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple")) From 54e3a993c89c2e17a383201ed456fdf96792b33d Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 11:06:21 -0400 Subject: [PATCH 07/39] Update documentation and API --- NAMESPACE | 2 +- R/reduce_multi_to_single_column.R | 105 +++++++++++------- man/combine_checkboxes.Rd | 63 +++++++++++ man/reduce_multi_to_single_column.Rd | 42 ------- .../test-reduce_multi_to_single_column.R | 22 ++-- 5 files changed, 138 insertions(+), 96 deletions(-) create mode 100644 man/combine_checkboxes.Rd delete mode 100644 man/reduce_multi_to_single_column.Rd diff --git a/NAMESPACE b/NAMESPACE index f967b2cf..c05ddda4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(tbl_sum,redcap_supertbl) S3method(vec_ptype_abbr,redcap_supertbl) export(add_skimr_metadata) export(bind_tibbles) +export(combine_checkboxes) export(extract_tibble) export(extract_tibbles) export(fmt_strip_field_embedding) @@ -13,7 +14,6 @@ export(fmt_strip_trailing_punct) export(fmt_strip_whitespace) export(make_labelled) export(read_redcap) -export(reduce_multi_to_single_column) export(write_redcap_xlsx) importFrom(REDCapR,redcap_arm_export) importFrom(REDCapR,redcap_event_instruments) diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index 55fa273f..b62ea6db 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -1,94 +1,115 @@ -#' @title Reduce Multiple Choice Checkbox Fields to Single Column +#' @title Combine Checkbox Fields into a Single Column #' #' @description -#' Convert checkbox fields in a data tibble, represented as many columns, into -#' a single column with values. +#' [combine_checkboxes()] consolidates multiple checkbox fields in a REDCap data +#' tibble into a single column. This transformation simplifies analysis by +#' combining multiple binary columns into a singular and informative labelled +#' factor column. #' #' @param supertbl A supertibble generated by [read_redcap()]. Required. -#' @param tbl The `redcap_form_name` of the data tibble under analysis. Required. -#' @param cols One or more columns to specify the checkbox field names for conversion. Required. -#' @param cols_to A string for the name of the column to consolidate values under. Required. -#' @param multi_to A string to specify the placeholder value for rows where multiple -#' checkboxes are selected. Default "Multiple". -#' @param no_val A string indicating a value to use when no checkboxes are selected. -#' Default `NA`. -#' @param raw_or_label A string (either 'raw' or 'label') that specifies whether -#' to export the raw coded values or the labels for the options of categorical -#' fields. Default is 'label'. -#' @param keep Whether or not to keep the original fields in the output. Default `TRUE`. +#' @param form_name The name of the REDCap form (instrument) containing the checkbox +#' fields. Required. +#' @param cols <[`tidy-select`][tidyr_tidy_select]> Checbox columns to combine to +#' single column. Required. +#' @param values_to A string specifying the name of the column to combine checkbox +#' values under. Required. +#' @param multi_value_label A string specifying the value to be used when multiple +#' checkbox fields are selected. Default "Multiple". +#' @param values_fill Value to use when no checkboxes are selected. Default `NA`. +#' @param raw_or_label Either 'raw' or 'label' to specify whether to use raw coded +#' values or labels for the options. Default 'label'. +#' @param keep Logical indicating whether to keep the original checkbox fields in +#' the output. Default `TRUE`. #' -#' @returns A `tibble`. +#' @return A modified supertibble. +#' +#' @examples +#' \dontrun{ +#' supertbl <- read_redcap(redcap_uri, token) +#' combined_tbl <- combine_checkboxes( +#' supertbl = supertbl, +#' form_name = "demographics", +#' cols = starts_with("race"), +#' values_to = "race_combined", +#' multi_value_label = "Multiple", +#' values_fill = NA +#' ) +#' } #' #' @export -reduce_multi_to_single_column <- function(supertbl, - tbl, - cols, - cols_to, - multi_val = "Multiple", - no_val = NA, - raw_or_label = "label", - keep = TRUE) { +combine_checkboxes <- function(supertbl, + form_name, + cols, + values_to, + multi_value_label = "Multiple", + values_fill = NA, + raw_or_label = "label", + keep = TRUE) { # Save user cols to enquosure cols_exp <- enquo(cols) - # Extract tbl from supertbl + # Extract form_name from supertbl data_tbl <- supertbl %>% - extract_tibble(tbl) + extract_tibble(form_name) # Get field names from cols_exp field_names <- names(eval_select(cols_exp, data = data_tbl)) # Assume the first instrument in the metadata contains IDs # REDCap enforces this constraints, we reflect this in read_redcap -> get_field_to_drop - record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]$field_name[1] + record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]]$field_name[1] # Combine record identifier with remaining possible project identifiers - instrument_identifiers <- c(record_id_field, "redcap_form_instance", "redcap_form_name", "redcap_event", "redcap_event_instance") + instrument_identifiers <- c(record_id_field, + "redcap_form_instance", + "redcap_form_name", + "redcap_event", + "redcap_event_instance") - # Define cols_to as the count of TRUEs/1s for the given checkbox field + # Define values_to as the count of TRUEs/1s for the given checkbox field # Assign TRUE if multiple selections made, and FALSE if one or zero made out <- data_tbl %>% select(any_of(instrument_identifiers), !!!eval_select(cols_exp, data_tbl)) %>% mutate( - !!cols_to := case_when(rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, + !!values_to := case_when(rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, TRUE ~ FALSE) ) # Get metadata reference table - metadata <- get_metadata_ref(out, supertbl, tbl, instrument_identifiers) + metadata <- get_metadata_ref(out, supertbl, form_name, instrument_identifiers) # Replace TRUEs/1s with raw/label values from metadata out <- out %>% - mutate(across(-c(any_of(instrument_identifiers), !!cols_to), ~ replace_true(.x, + mutate(across(-c(any_of(instrument_identifiers), !!values_to), ~ replace_true(.x, cur_column(), metadata = metadata, raw_or_label = raw_or_label))) - # Convert cols_to from TRUE/FALSE to multi_val or identified single val + # Convert values_to from TRUE/FALSE to multi_value_label or identified single val out <- out %>% mutate(across(field_names, as.character), # enforce to character strings - across(!!cols_to, ~as.character(.))) %>% + across(!!values_to, ~as.character(.))) %>% rowwise() %>% mutate( - !!cols_to := ifelse(!!sym(cols_to) == "TRUE", - multi_val, + !!values_to := ifelse(!!sym(values_to) == "TRUE", + multi_value_label, coalesce(!!!syms(field_names))), - !!cols_to := ifelse(is.na(!!sym(cols_to)), no_val, !!sym(cols_to)) + !!values_to := ifelse(is.na(!!sym(values_to)), values_fill, !!sym(values_to)) ) %>% ungroup() %>% - select(any_of(instrument_identifiers), !!cols_to) %>% + select(any_of(instrument_identifiers), !!values_to) %>% mutate( - !!cols_to := factor(!!sym(cols_to), levels = c(metadata[[raw_or_label]], multi_val, no_val)) + !!values_to := factor(!!sym(values_to), levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)) ) - # Join back onto original data tbl + # Join back onto original data form_name out <- out %>% right_join(data_tbl, by = intersect(instrument_identifiers, names(out))) %>% - relocate(!!cols_to, .after = everything()) + relocate(!!values_to, .after = everything()) # Keep or remove original multi columns if (keep) { @@ -101,10 +122,10 @@ reduce_multi_to_single_column <- function(supertbl, #' @noRd #' @keywords internal -get_metadata_ref <- function(data, supertbl, tbl, instrument_identifiers) { +get_metadata_ref <- function(data, supertbl, form_name, instrument_identifiers) { # Create a metadata reference table linking field name to raw and label values - out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% + out <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]] %>% filter(field_name %in% names(data)[!names(data) %in% instrument_identifiers]) %>% select(field_name, select_choices_or_calculations) %>% mutate( diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd new file mode 100644 index 00000000..8fc340e3 --- /dev/null +++ b/man/combine_checkboxes.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reduce_multi_to_single_column.R +\name{combine_checkboxes} +\alias{combine_checkboxes} +\title{Combine Checkbox Fields into a Single Column} +\usage{ +combine_checkboxes( + supertbl, + form_name, + cols, + values_to, + multi_value_label = "Multiple", + values_fill = NA, + raw_or_label = "label", + keep = TRUE +) +} +\arguments{ +\item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}. Required.} + +\item{form_name}{The name of the REDCap form (instrument) containing the checkbox +fields. Required.} + +\item{cols}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Checbox columns to combine to +single column. Required.} + +\item{values_to}{A string specifying the name of the column to combine checkbox +values under. Required.} + +\item{multi_value_label}{A string specifying the value to be used when multiple +checkbox fields are selected. Default "Multiple".} + +\item{values_fill}{Value to use when no checkboxes are selected. Default \code{NA}.} + +\item{raw_or_label}{Either 'raw' or 'label' to specify whether to use raw coded +values or labels for the options. Default 'label'.} + +\item{keep}{Logical indicating whether to keep the original checkbox fields in +the output. Default \code{TRUE}.} +} +\value{ +A modified supertibble. +} +\description{ +\code{\link[=combine_checkboxes]{combine_checkboxes()}} consolidates multiple checkbox fields in a REDCap data +tibble into a single column. This transformation simplifies analysis by +combining multiple binary columns into a singular and informative labelled +factor column. +} +\examples{ +\dontrun{ +supertbl <- read_redcap(redcap_uri, token) +combined_tbl <- combine_checkboxes( + supertbl = supertbl, + form_name = "demographics", + cols = starts_with("race"), + values_to = "race_combined", + multi_value_label = "Multiple", + values_fill = NA +) +} + +} diff --git a/man/reduce_multi_to_single_column.Rd b/man/reduce_multi_to_single_column.Rd deleted file mode 100644 index 7722fba0..00000000 --- a/man/reduce_multi_to_single_column.Rd +++ /dev/null @@ -1,42 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reduce_multi_to_single_column.R -\name{reduce_multi_to_single_column} -\alias{reduce_multi_to_single_column} -\title{Reduce Multiple Choice Checkbox Fields to Single Column} -\usage{ -reduce_multi_to_single_column( - supertbl, - tbl, - cols, - cols_to, - multi_val = "Multiple", - no_val = NA, - raw_or_label = "label" -) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}. Required.} - -\item{tbl}{The \code{redcap_form_name} of the data tibble under analysis. Required.} - -\item{cols}{One or more columns to specify the checkbox field names for conversion. Required.} - -\item{cols_to}{A string for the name of the column to consolidate values under. Required.} - -\item{no_val}{A string indicating a value to use when no checkboxes are selected. -Default \code{NA}.} - -\item{raw_or_label}{A string (either 'raw' or 'label') that specifies whether -to export the raw coded values or the labels for the options of categorical -fields. Default is 'label'.} - -\item{multi_to}{A string to specify the placeholder value for rows where multiple -checkboxes are selected. Default "Multiple".} -} -\value{ -A \code{tibble}. -} -\description{ -Convert checkbox fields in a data tibble, represented as many columns, into -a single column with values. -} diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index dc4c8aad..22bc8d3d 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -37,12 +37,12 @@ supertbl <- tibble::tribble( class(supertbl) <- c("redcap_supertbl", class(supertbl)) test_that("reduce_multo_to_single works for nonrepeat instrument", { - out <- reduce_multi_to_single_column(supertbl = supertbl, - tbl = "nonrepeat_instrument", + out <- combine_checkboxes(supertbl = supertbl, + form_name = "nonrepeat_instrument", cols = starts_with("multi"), - cols_to = "new_col", - multi_val = "multiple", # multi_val declared - no_val = "none") # no_val declared + values_to = "new_col", + multi_value_label = "multiple", # multi_value_label declared + values_fill = "none") # values_fill declared expected_out <- tibble::tribble( ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col", @@ -58,10 +58,10 @@ test_that("reduce_multo_to_single works for nonrepeat instrument", { }) test_that("reduce_multo_to_single works for nonrepeat instrument and drop old values", { - out <- reduce_multi_to_single_column(supertbl = supertbl, - tbl = "nonrepeat_instrument", + out <- combine_checkboxes(supertbl = supertbl, + form_name = "nonrepeat_instrument", cols = starts_with("multi"), - cols_to = "new_col", + values_to = "new_col", keep = FALSE) # Test keep = FALSE expected_out <- tibble::tribble( @@ -78,10 +78,10 @@ test_that("reduce_multo_to_single works for nonrepeat instrument and drop old va }) test_that("reduce_multo_to_single works for repeat instrument", { - out <- reduce_multi_to_single_column(supertbl = supertbl, - tbl = "repeat_instrument", + out <- combine_checkboxes(supertbl = supertbl, + form_name = "repeat_instrument", cols = starts_with("repeat"), - cols_to = "new_col") + values_to = "new_col") expected_out <- tibble::tribble( ~"study_id", ~"redcap_form_instance", ~"redcap_event", ~"repeat___1", ~"repeat___2", ~"new_col", From ec5c19dc9eef74f16911077301e854a1c2bd1744 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 11:38:21 -0400 Subject: [PATCH 08/39] Update combine_checkbox api and docs --- R/reduce_multi_to_single_column.R | 23 ++++---- .../test-reduce_multi_to_single_column.R | 52 +++++++++++++------ 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index b62ea6db..c8a53ce0 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -70,7 +70,7 @@ combine_checkboxes <- function(supertbl, # Define values_to as the count of TRUEs/1s for the given checkbox field # Assign TRUE if multiple selections made, and FALSE if one or zero made - out <- data_tbl %>% + data_tbl_mod <- data_tbl %>% select(any_of(instrument_identifiers), !!!eval_select(cols_exp, data_tbl)) %>% mutate( !!values_to := case_when(rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, @@ -79,10 +79,10 @@ combine_checkboxes <- function(supertbl, ) # Get metadata reference table - metadata <- get_metadata_ref(out, supertbl, form_name, instrument_identifiers) + metadata <- get_metadata_ref(data_tbl_mod, supertbl, form_name, instrument_identifiers) # Replace TRUEs/1s with raw/label values from metadata - out <- out %>% + data_tbl_mod <- data_tbl_mod %>% mutate(across(-c(any_of(instrument_identifiers), !!values_to), ~ replace_true(.x, cur_column(), metadata = metadata, @@ -90,7 +90,7 @@ combine_checkboxes <- function(supertbl, # Convert values_to from TRUE/FALSE to multi_value_label or identified single val - out <- out %>% + data_tbl_mod <- data_tbl_mod %>% mutate(across(field_names, as.character), # enforce to character strings across(!!values_to, ~as.character(.))) %>% rowwise() %>% @@ -107,17 +107,20 @@ combine_checkboxes <- function(supertbl, ) # Join back onto original data form_name - out <- out %>% - right_join(data_tbl, by = intersect(instrument_identifiers, names(out))) %>% + data_tbl_mod <- data_tbl_mod %>% + right_join(data_tbl, by = intersect(instrument_identifiers, names(data_tbl_mod))) %>% relocate(!!values_to, .after = everything()) # Keep or remove original multi columns - if (keep) { - out - } else { - out %>% + if (!keep) { + data_tbl_mod <- data_tbl_mod %>% select(-field_names) } + + # Update the supertbl data tibble + supertbl$redcap_data[supertbl$redcap_form_name == form_name][[1]] <- data_tbl_mod + + supertbl } #' @noRd diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index 22bc8d3d..fff54416 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -36,13 +36,27 @@ supertbl <- tibble::tribble( class(supertbl) <- c("redcap_supertbl", class(supertbl)) -test_that("reduce_multo_to_single works for nonrepeat instrument", { +test_that("combine_checkboxes returns an expected supertbl", { out <- combine_checkboxes(supertbl = supertbl, - form_name = "nonrepeat_instrument", - cols = starts_with("multi"), - values_to = "new_col", - multi_value_label = "multiple", # multi_value_label declared - values_fill = "none") # values_fill declared + form_name = "nonrepeat_instrument", + cols = starts_with("multi"), + values_to = "new_col") # values_fill declared + + expect_setequal(class(out), c("redcap_supertbl", "tbl_df", "tbl", "data.frame")) + expect_equal(nrow(out), 2) +}) + +test_that("combine_checkboxes works for nonrepeat instrument", { + out <- combine_checkboxes( + supertbl = supertbl, + form_name = "nonrepeat_instrument", + cols = starts_with("multi"), + values_to = "new_col", + multi_value_label = "multiple", # multi_value_label declared + values_fill = "none" # values_fill declared + ) %>% + pull(redcap_data) %>% + dplyr::first() expected_out <- tibble::tribble( ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col", @@ -57,12 +71,16 @@ test_that("reduce_multo_to_single works for nonrepeat instrument", { expect_equal(out, expected_out) }) -test_that("reduce_multo_to_single works for nonrepeat instrument and drop old values", { - out <- combine_checkboxes(supertbl = supertbl, - form_name = "nonrepeat_instrument", - cols = starts_with("multi"), - values_to = "new_col", - keep = FALSE) # Test keep = FALSE +test_that("combine_checkboxes works for nonrepeat instrument and drop old values", { + out <- combine_checkboxes( + supertbl = supertbl, + form_name = "nonrepeat_instrument", + cols = starts_with("multi"), + values_to = "new_col", + keep = FALSE # Test keep = FALSE + ) %>% + pull(redcap_data) %>% + dplyr::first() expected_out <- tibble::tribble( ~"study_id", ~"extra_data", ~"new_col", @@ -77,11 +95,13 @@ test_that("reduce_multo_to_single works for nonrepeat instrument and drop old va expect_equal(out, expected_out) }) -test_that("reduce_multo_to_single works for repeat instrument", { +test_that("combine_checkboxes works for repeat instrument", { out <- combine_checkboxes(supertbl = supertbl, - form_name = "repeat_instrument", - cols = starts_with("repeat"), - values_to = "new_col") + form_name = "repeat_instrument", + cols = starts_with("repeat"), + values_to = "new_col") %>% + pull(redcap_data) %>% + dplyr::nth(2) expected_out <- tibble::tribble( ~"study_id", ~"redcap_form_instance", ~"redcap_event", ~"repeat___1", ~"repeat___2", ~"new_col", From 6b1fb48f41a3ae14a09eee04ced4e1b5d01d95ed Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 12:03:48 -0400 Subject: [PATCH 09/39] Add check for if no fields exist in selection --- NAMESPACE | 2 ++ R/REDCapTidieR-package.R | 4 ++-- R/checks.R | 25 +++++++++++++++++++++++++ R/reduce_multi_to_single_column.R | 4 ++-- man/check_fields_exist.Rd | 19 +++++++++++++++++++ tests/testthat/test-checks.R | 8 ++++++++ 6 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 man/check_fields_exist.Rd diff --git a/NAMESPACE b/NAMESPACE index c05ddda4..19b312a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -101,6 +101,7 @@ importFrom(rlang,enexpr) importFrom(rlang,enquo) importFrom(rlang,env_poke) importFrom(rlang,eval_tidy) +importFrom(rlang,expr) importFrom(rlang,get_env) importFrom(rlang,global_env) importFrom(rlang,is_atomic) @@ -109,6 +110,7 @@ importFrom(rlang,is_bare_list) importFrom(rlang,is_installed) importFrom(rlang,new_environment) importFrom(rlang,quo_get_expr) +importFrom(rlang,quo_name) importFrom(rlang,sym) importFrom(rlang,syms) importFrom(rlang,try_fetch) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index 89a04b5f..a8dfadb9 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -15,8 +15,8 @@ #' redcap_metadata_read redcap_read_oneshot sanitize_token #' @importFrom rlang .data !!! abort as_closure caller_arg caller_env catch_cnd #' check_installed cnd_muffle current_call current_env enexpr enquo env_poke -#' eval_tidy get_env global_env is_atomic is_bare_formula is_bare_list -#' is_installed new_environment quo_get_expr try_fetch zap as_label sym syms +#' eval_tidy get_env global_env is_atomic is_bare_formula is_bare_list quo_name +#' is_installed new_environment quo_get_expr try_fetch zap as_label sym syms expr #' @importFrom stringi stri_split_fixed #' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc #' str_trim str_ends diff --git a/R/checks.R b/R/checks.R index 09c39a7c..b323d82f 100644 --- a/R/checks.R +++ b/R/checks.R @@ -659,3 +659,28 @@ check_extra_field_values_message <- function(extra_field_values, call = caller_e values = values ) } + +#' @title +#' Check fields exist for checkbox combination +#' +#' @param fields Vector of character strings to check the length of +#' @param expr A quosure expression +#' @param call The calling environment to use in the error message +#' +#' @keywords internal +check_fields_exist <- function(fields, expr, call = caller_env()) { + + expr <- quo_name(expr) + + if (length(fields) == 0) { + msg <- c( + x = "No fields detected using `{expr}`.", + i = "Ensure that the column names specified in {.arg cols} match the columns in your data. Check for typos or use {.pkg tidyselect} helpers like {.code starts_with()}, `contains()`, etc." # nolint: line_length_linter + ) + + cli_abort( + msg, + class = c("missing_checkbox_fields", "REDCapTidieR_cond") + ) + } +} diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index c8a53ce0..f5d774e0 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -46,7 +46,6 @@ combine_checkboxes <- function(supertbl, values_fill = NA, raw_or_label = "label", keep = TRUE) { - # Save user cols to enquosure cols_exp <- enquo(cols) @@ -54,8 +53,9 @@ combine_checkboxes <- function(supertbl, data_tbl <- supertbl %>% extract_tibble(form_name) - # Get field names from cols_exp + # Get field names from cols_exp, check that fields exist field_names <- names(eval_select(cols_exp, data = data_tbl)) + check_fields_exist(fields = field_names, expr = cols_exp) # Assume the first instrument in the metadata contains IDs # REDCap enforces this constraints, we reflect this in read_redcap -> get_field_to_drop diff --git a/man/check_fields_exist.Rd b/man/check_fields_exist.Rd new file mode 100644 index 00000000..5076a1b7 --- /dev/null +++ b/man/check_fields_exist.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_fields_exist} +\alias{check_fields_exist} +\title{Check fields exist for checkbox combination} +\usage{ +check_fields_exist(fields, expr, call = caller_env()) +} +\arguments{ +\item{fields}{Vector of character strings to check the length of} + +\item{expr}{A quosure expression} + +\item{call}{The calling environment to use in the error message} +} +\description{ +Check fields exist for checkbox combination +} +\keyword{internal} diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index b6713790..2bb4708d 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -246,3 +246,11 @@ test_that("check_extra_field_values works", { check_extra_field_values(c(1, NA, 2), "1") |> expect_equal("2") }) + +test_that("check_fields_exist works", { + check_fields_exist(fields = character(0), expr = expr(starts_with("temp"))) %>% + expect_error(class = "missing_checkbox_fields") + + check_fields_exist(fields = c(1,2,3), expr = expr(starts_with("temp"))) %>% + expect_no_error() +}) From d55dc00656be7c9de26872e2b45611b712ef667a Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 14:00:49 -0400 Subject: [PATCH 10/39] Add check_fields_are_checkboxes function --- R/checks.R | 32 ++++++++++++++++++++++++++++++ R/reduce_multi_to_single_column.R | 9 +++++++-- man/check_fields_are_checkboxes.Rd | 17 ++++++++++++++++ tests/testthat/test-checks.R | 26 ++++++++++++++++++++---- 4 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 man/check_fields_are_checkboxes.Rd diff --git a/R/checks.R b/R/checks.R index b323d82f..bf3ef7f4 100644 --- a/R/checks.R +++ b/R/checks.R @@ -684,3 +684,35 @@ check_fields_exist <- function(fields, expr, call = caller_env()) { ) } } + + +#' @title +#' Check fields are of checkbox field type +#' +#' @param metadata_tbl A metadata tibble from a supertibble +#' @param call The calling environment to use in the error message +#' +#' @keywords internal + +check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()){ + + non_checkboxes <- metadata_tbl %>% + filter(field_type != "checkbox") + + if (nrow(non_checkboxes) > 0) { + + non_checkboxes <- non_checkboxes %>% + pull(field_name) + + msg <- c( + x = "Non-checkbox fields selected for {.code form_name}", + `!` = "The following fields returned as non-checkbox field types: {.code {non_checkboxes}}" + ) + + cli_abort( + msg, + class = c("non_checkbox_fields", "REDCapTidieR_cond") + ) + } + +} diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index f5d774e0..0e0402f7 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -78,7 +78,7 @@ combine_checkboxes <- function(supertbl, ) - # Get metadata reference table + # Get metadata reference table, check that chose fields are checkboxes metadata <- get_metadata_ref(data_tbl_mod, supertbl, form_name, instrument_identifiers) # Replace TRUEs/1s with raw/label values from metadata @@ -129,7 +129,12 @@ get_metadata_ref <- function(data, supertbl, form_name, instrument_identifiers) # Create a metadata reference table linking field name to raw and label values out <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]] %>% - filter(field_name %in% names(data)[!names(data) %in% instrument_identifiers]) %>% + filter(field_name %in% names(data)[!names(data) %in% instrument_identifiers]) + + # Make sure selection is checkbox metadata field type + check_fields_are_checkboxes(out) + + out <- out %>% select(field_name, select_choices_or_calculations) %>% mutate( original_field = sub("___.*$", "", field_name) diff --git a/man/check_fields_are_checkboxes.Rd b/man/check_fields_are_checkboxes.Rd new file mode 100644 index 00000000..51caa024 --- /dev/null +++ b/man/check_fields_are_checkboxes.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_fields_are_checkboxes} +\alias{check_fields_are_checkboxes} +\title{Check fields are of checkbox field type} +\usage{ +check_fields_are_checkboxes(metadata_tbl, call = caller_env()) +} +\arguments{ +\item{metadata_tbl}{A metadata tibble from a supertibble} + +\item{call}{The calling environment to use in the error message} +} +\description{ +Check fields are of checkbox field type +} +\keyword{internal} diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 2bb4708d..558bc220 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -53,10 +53,10 @@ test_that("check_repeat_and_nonrepeat works", { ) expect_error(check_repeat_and_nonrepeat(db_data = test_data_longitudinal), - class = "repeat_nonrepeat_instrument" + class = "repeat_nonrepeat_instrument" ) expect_error(check_repeat_and_nonrepeat(db_data = test_data_not_longitudinal), - class = "repeat_nonrepeat_instrument" + class = "repeat_nonrepeat_instrument" ) expect_no_error(check_repeat_and_nonrepeat(db_data = test_repeating_event)) }) @@ -151,10 +151,10 @@ test_that("checkmate wrappers work", { # extension expect_warning(check_arg_is_valid_extension("temp.docx", valid_extensions = "xlsx"), - class = "invalid_file_extension" + class = "invalid_file_extension" ) expect_warning(check_arg_is_valid_extension("xlsx.", valid_extensions = "xlsx"), - class = "invalid_file_extension" + class = "invalid_file_extension" ) expect_true(check_arg_is_valid_extension("temp.xlsx", valid_extensions = "xlsx")) }) @@ -254,3 +254,21 @@ test_that("check_fields_exist works", { check_fields_exist(fields = c(1,2,3), expr = expr(starts_with("temp"))) %>% expect_no_error() }) + +test_that("check_fields_are_checkboxes works", { + metadata <- tibble::tribble( + ~field_name, ~field_type, + "record_id", "text", + "text_field", "text", + "calc_field", "calc", + "checkbox___1", "checkbox", + "checkbox___2", "checkbox", + "checkbox___3", "checkbox" + ) + + metadata_filtered <- metadata %>% + filter("checkbox" %in% field_name) + + expect_error(check_fields_are_checkboxes(metadata), class = "non_checkbox_fields") + expect_no_error(check_fields_are_checkboxes(metadata_filtered)) +}) From 261342dc5e67ec94d751a2a8e0036a2644bb8713 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 14:58:37 -0400 Subject: [PATCH 11/39] Minor cleaning --- NAMESPACE | 1 + R/REDCapTidieR-package.R | 3 +- R/reduce_multi_to_single_column.R | 29 ++++++---- man/get_metadata_ref.Rd | 25 +++++++++ .../test-reduce_multi_to_single_column.R | 56 +++++++++++++++---- 5 files changed, 93 insertions(+), 21 deletions(-) create mode 100644 man/get_metadata_ref.Rd diff --git a/NAMESPACE b/NAMESPACE index 19b312a2..9efe8a4e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -72,6 +72,7 @@ importFrom(purrr,discard) importFrom(purrr,flatten_chr) importFrom(purrr,map) importFrom(purrr,map2) +importFrom(purrr,map2_chr) importFrom(purrr,map_int) importFrom(purrr,map_lgl) importFrom(purrr,pluck) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index a8dfadb9..95358f4b 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -10,7 +10,8 @@ #' @importFrom formattable percent #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date -#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some pmap discard flatten_chr +#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some pmap +#' discard flatten_chr map2_chr #' @importFrom REDCapR redcap_arm_export redcap_event_instruments redcap_instruments #' redcap_metadata_read redcap_read_oneshot sanitize_token #' @importFrom rlang .data !!! abort as_closure caller_arg caller_env catch_cnd diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index 0e0402f7..facab7ff 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -46,6 +46,7 @@ combine_checkboxes <- function(supertbl, values_fill = NA, raw_or_label = "label", keep = TRUE) { + # Save user cols to enquosure cols_exp <- enquo(cols) @@ -78,7 +79,7 @@ combine_checkboxes <- function(supertbl, ) - # Get metadata reference table, check that chose fields are checkboxes + # Get metadata reference table, check that chosen fields are checkboxes metadata <- get_metadata_ref(data_tbl_mod, supertbl, form_name, instrument_identifiers) # Replace TRUEs/1s with raw/label values from metadata @@ -123,9 +124,21 @@ combine_checkboxes <- function(supertbl, supertbl } -#' @noRd +#' @title Utility function for getting metadata raw and label values for checkboxes +#' +#' @param data a data tibble +#' @param supertbl A supertibble generated by [read_redcap()]. +#' @param form_name The name of the REDCap form (instrument) containing the checkbox +#' fields. +#' @param instrument_identifiers Character string vector of project record identifier vars +#' +#' @returns a tibble +#' #' @keywords internal -get_metadata_ref <- function(data, supertbl, form_name, instrument_identifiers) { +get_metadata_ref <- function(data, + supertbl, + form_name, + instrument_identifiers) { # Create a metadata reference table linking field name to raw and label values out <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]] %>% @@ -140,16 +153,12 @@ get_metadata_ref <- function(data, supertbl, form_name, instrument_identifiers) original_field = sub("___.*$", "", field_name) ) %>% mutate(pairs = strsplit(select_choices_or_calculations, " \\| "), - label_value = NA) - - for (i in seq(nrow(out))) { - out$label_value[i] <- out$pairs[[i]][i] - } + label_value = NA, + label_value = purrr::map2_chr(pairs, row_number(), \(.x, .y) .x[.y])) out %>% separate_wider_delim(label_value, delim = ", ", names = c("raw", "label")) %>% select(field_name, raw, label) - } #' @noRd @@ -158,7 +167,7 @@ replace_true <- function(col, col_name, metadata, raw_or_label) { # Replace TRUEs/1s with the appropriate raw or label value from the metadata replacement <- metadata %>% filter(field_name == col_name) %>% pull(raw_or_label) - col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 and 1 == TRUE + col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 & 1 == TRUE # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values return(col) } diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd new file mode 100644 index 00000000..75ff31c4 --- /dev/null +++ b/man/get_metadata_ref.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reduce_multi_to_single_column.R +\name{get_metadata_ref} +\alias{get_metadata_ref} +\title{Utility function for getting metadata raw and label values for checkboxes} +\usage{ +get_metadata_ref(data, supertbl, form_name, instrument_identifiers) +} +\arguments{ +\item{data}{a data tibble} + +\item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} + +\item{form_name}{The name of the REDCap form (instrument) containing the checkbox +fields.} + +\item{instrument_identifiers}{Character string vector of project record identifier vars} +} +\value{ +a tibble +} +\description{ +Utility function for getting metadata raw and label values for checkboxes +} +\keyword{internal} diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index fff54416..61aa00fc 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -6,12 +6,12 @@ nonrepeat_data <- tibble::tribble( ) nonrepeat_metadata <- tibble::tribble( - ~"field_name", ~"select_choices_or_calculations", - "study_id", NA, - "multi___1", "1, Red | 2, Yellow | 3, Blue", - "multi___2", "1, Red | 2, Yellow | 3, Blue", - "multi___3", "1, Red | 2, Yellow | 3, Blue", - "extra_data", "1, 1 | 2, 2 | 3,3" + ~"field_name", ~"field_type", ~"select_choices_or_calculations", + "study_id", "text", NA, + "multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue", + "multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue", + "multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue", + "extra_data", "dropdown", "1, 1 | 2, 2 | 3,3" ) repeat_data <- tibble::tribble( @@ -22,10 +22,10 @@ repeat_data <- tibble::tribble( ) repeat_metadata <- tibble::tribble( - ~"field_name", ~"select_choices_or_calculations", - "study_id", NA, - "repeat___1", "1, A | 2, B | 3, C", - "repeat___2", "1, A | 2, B | 3, C" + ~"field_name", ~"field_type", ~"select_choices_or_calculations", + "study_id", "text", NA, + "repeat___1", "checkbox", "1, A | 2, B | 3, C", + "repeat___2", "checkbox", "1, A | 2, B | 3, C" ) supertbl <- tibble::tribble( @@ -115,3 +115,39 @@ test_that("combine_checkboxes works for repeat instrument", { expect_equal(out, expected_out) }) + +test_that("get_metadata_ref works", { + data <- nonrepeat_data %>% + select(study_id, contains("multi")) %>% + mutate(new_data = c(FALSE, TRUE, FALSE)) + + out <- get_metadata_ref(data = data, supertbl = supertbl, form_name = "nonrepeat_instrument", instrument_identifiers = "study_id") + + expected_out <- tibble::tribble( + ~"field_name", ~"raw", ~"label", + "multi___1", "1", "Red", + "multi___2", "2", "Yellow", + "multi___3", "3", "Blue" + ) + + expect_equal(out, expected_out) +}) + +test_that("replace_true works", { + metadata <- tibble::tribble( + ~"field_name", ~"raw", ~"label", + "multi___1", "1", "Red", + "multi___2", "2", "Yellow", + "multi___3", "3", "Blue" + ) + + out <- replace_true(col = c(TRUE, TRUE, FALSE), col_name = "multi___1", metadata = metadata, raw_or_label = "raw") + expected_out <- c("1", "1", NA) + + expect_equal(out, expected_out) + + out <- replace_true(col = c(TRUE, TRUE, FALSE), col_name = "multi___1", metadata = metadata, raw_or_label = "label") + expected_out <- c("Red", "Red", NA) + + expect_equal(out, expected_out) +}) From e1d4eb8c99c44adb486a38d04d3b65dafd1f18b3 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 15:10:49 -0400 Subject: [PATCH 12/39] Update version, test recheck workflow --- .github/workflows/recheck.yml | 0 DESCRIPTION | 2 +- NEWS.md | 2 ++ 3 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/recheck.yml diff --git a/.github/workflows/recheck.yml b/.github/workflows/recheck.yml new file mode 100644 index 00000000..e69de29b diff --git a/DESCRIPTION b/DESCRIPTION index 2a75938d..6019ff00 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: REDCapTidieR Type: Package Title: Extract 'REDCap' Databases into Tidy 'Tibble's -Version: 1.1.1 +Version: 1.2.0 Authors@R: c( person("Richard", "Hanna", , "richardshanna91@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0009-0005-6496-8154")), diff --git a/NEWS.md b/NEWS.md index fb1435a0..6211fda9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# REDCapTidieR 1.2.0 + # REDCapTidieR 1.1.1 (development version) Version 1.1.1 From 7207f09faea952fa173b5efc3bede4887408a5cc Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 15:14:09 -0400 Subject: [PATCH 13/39] Test recheck workflow file --- .github/workflows/recheck.yml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/.github/workflows/recheck.yml b/.github/workflows/recheck.yml index e69de29b..323d2ad3 100644 --- a/.github/workflows/recheck.yml +++ b/.github/workflows/recheck.yml @@ -0,0 +1,18 @@ +on: + workflow_dispatch: + inputs: + which: + type: choice + description: Which dependents to check + options: + - strong + - most + +name: Reverse dependency check + +jobs: + revdep_check: + name: Reverse check ${{ inputs.which }} dependents + uses: r-devel/recheck/.github/workflows/recheck.yml@v1 + with: + which: ${{ inputs.which }} From 4f861e144fcc185f25ab9c1544012ca190330b37 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 15:30:59 -0400 Subject: [PATCH 14/39] Fix linting --- NAMESPACE | 1 + R/REDCapTidieR-package.R | 1 + R/checks.R | 10 +-- R/reduce_multi_to_single_column.R | 68 +++++++++++-------- tests/testthat/test-checks.R | 12 ++-- .../test-reduce_multi_to_single_column.R | 27 +++++--- 6 files changed, 68 insertions(+), 51 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 9efe8a4e..7dc0131a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -87,6 +87,7 @@ importFrom(readr,parse_integer) importFrom(readr,parse_logical) importFrom(readr,parse_time) importFrom(rlang,"!!!") +importFrom(rlang,":=") importFrom(rlang,.data) importFrom(rlang,abort) importFrom(rlang,as_closure) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index 95358f4b..5f4105d5 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -18,6 +18,7 @@ #' check_installed cnd_muffle current_call current_env enexpr enquo env_poke #' eval_tidy get_env global_env is_atomic is_bare_formula is_bare_list quo_name #' is_installed new_environment quo_get_expr try_fetch zap as_label sym syms expr +#' := #' @importFrom stringi stri_split_fixed #' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc #' str_trim str_ends diff --git a/R/checks.R b/R/checks.R index bf3ef7f4..e763c864 100644 --- a/R/checks.R +++ b/R/checks.R @@ -669,7 +669,6 @@ check_extra_field_values_message <- function(extra_field_values, call = caller_e #' #' @keywords internal check_fields_exist <- function(fields, expr, call = caller_env()) { - expr <- quo_name(expr) if (length(fields) == 0) { @@ -694,15 +693,13 @@ check_fields_exist <- function(fields, expr, call = caller_env()) { #' #' @keywords internal -check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()){ - +check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) { non_checkboxes <- metadata_tbl %>% - filter(field_type != "checkbox") + filter(.data$field_type != "checkbox") if (nrow(non_checkboxes) > 0) { - non_checkboxes <- non_checkboxes %>% - pull(field_name) + pull(.data$field_name) msg <- c( x = "Non-checkbox fields selected for {.code form_name}", @@ -714,5 +711,4 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()){ class = c("non_checkbox_fields", "REDCapTidieR_cond") ) } - } diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index facab7ff..1f3fe717 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -46,7 +46,6 @@ combine_checkboxes <- function(supertbl, values_fill = NA, raw_or_label = "label", keep = TRUE) { - # Save user cols to enquosure cols_exp <- enquo(cols) @@ -63,20 +62,23 @@ combine_checkboxes <- function(supertbl, record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]]$field_name[1] # Combine record identifier with remaining possible project identifiers - instrument_identifiers <- c(record_id_field, - "redcap_form_instance", - "redcap_form_name", - "redcap_event", - "redcap_event_instance") + instrument_identifiers <- c( + record_id_field, + "redcap_form_instance", + "redcap_form_name", + "redcap_event", + "redcap_event_instance" + ) # Define values_to as the count of TRUEs/1s for the given checkbox field # Assign TRUE if multiple selections made, and FALSE if one or zero made data_tbl_mod <- data_tbl %>% select(any_of(instrument_identifiers), !!!eval_select(cols_exp, data_tbl)) %>% mutate( - !!values_to := case_when(rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, - TRUE ~ FALSE) - + !!values_to := case_when( + rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, + TRUE ~ FALSE + ) ) # Get metadata reference table, check that chosen fields are checkboxes @@ -84,21 +86,27 @@ combine_checkboxes <- function(supertbl, # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% - mutate(across(-c(any_of(instrument_identifiers), !!values_to), ~ replace_true(.x, - cur_column(), - metadata = metadata, - raw_or_label = raw_or_label))) - + mutate(across( + -c(any_of(instrument_identifiers), !!values_to), + ~ replace_true(.x, + cur_column(), + metadata = metadata, + raw_or_label = raw_or_label + ) + )) # Convert values_to from TRUE/FALSE to multi_value_label or identified single val data_tbl_mod <- data_tbl_mod %>% - mutate(across(field_names, as.character), # enforce to character strings - across(!!values_to, ~as.character(.))) %>% + mutate( + across(field_names, as.character), # enforce to character strings + across(!!values_to, ~ as.character(.)) + ) %>% rowwise() %>% mutate( !!values_to := ifelse(!!sym(values_to) == "TRUE", - multi_value_label, - coalesce(!!!syms(field_names))), + multi_value_label, + coalesce(!!!syms(field_names)) + ), !!values_to := ifelse(is.na(!!sym(values_to)), values_fill, !!sym(values_to)) ) %>% ungroup() %>% @@ -139,34 +147,36 @@ get_metadata_ref <- function(data, supertbl, form_name, instrument_identifiers) { - # Create a metadata reference table linking field name to raw and label values out <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]] %>% - filter(field_name %in% names(data)[!names(data) %in% instrument_identifiers]) + filter(.data$field_name %in% names(data)[!names(data) %in% instrument_identifiers]) # Make sure selection is checkbox metadata field type check_fields_are_checkboxes(out) out <- out %>% - select(field_name, select_choices_or_calculations) %>% + select(.data$field_name, .data$select_choices_or_calculations) %>% mutate( - original_field = sub("___.*$", "", field_name) + original_field = sub("___.*$", "", .data$field_name) ) %>% - mutate(pairs = strsplit(select_choices_or_calculations, " \\| "), - label_value = NA, - label_value = purrr::map2_chr(pairs, row_number(), \(.x, .y) .x[.y])) + mutate( + pairs = strsplit(.data$select_choices_or_calculations, " \\| "), + label_value = NA, + label_value = purrr::map2_chr(pairs, row_number(), \(.x, .y) .x[.y]) + ) out %>% - separate_wider_delim(label_value, delim = ", ", names = c("raw", "label")) %>% - select(field_name, raw, label) + separate_wider_delim(.data$label_value, delim = ", ", names = c("raw", "label")) %>% + select(.data$field_name, .data$raw, .data$label) } #' @noRd #' @keywords internal replace_true <- function(col, col_name, metadata, raw_or_label) { - # Replace TRUEs/1s with the appropriate raw or label value from the metadata - replacement <- metadata %>% filter(field_name == col_name) %>% pull(raw_or_label) + replacement <- metadata %>% + filter(.data$field_name == col_name) %>% + pull(raw_or_label) col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 & 1 == TRUE # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values return(col) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 558bc220..77435375 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -53,10 +53,10 @@ test_that("check_repeat_and_nonrepeat works", { ) expect_error(check_repeat_and_nonrepeat(db_data = test_data_longitudinal), - class = "repeat_nonrepeat_instrument" + class = "repeat_nonrepeat_instrument" ) expect_error(check_repeat_and_nonrepeat(db_data = test_data_not_longitudinal), - class = "repeat_nonrepeat_instrument" + class = "repeat_nonrepeat_instrument" ) expect_no_error(check_repeat_and_nonrepeat(db_data = test_repeating_event)) }) @@ -151,10 +151,10 @@ test_that("checkmate wrappers work", { # extension expect_warning(check_arg_is_valid_extension("temp.docx", valid_extensions = "xlsx"), - class = "invalid_file_extension" + class = "invalid_file_extension" ) expect_warning(check_arg_is_valid_extension("xlsx.", valid_extensions = "xlsx"), - class = "invalid_file_extension" + class = "invalid_file_extension" ) expect_true(check_arg_is_valid_extension("temp.xlsx", valid_extensions = "xlsx")) }) @@ -251,14 +251,14 @@ test_that("check_fields_exist works", { check_fields_exist(fields = character(0), expr = expr(starts_with("temp"))) %>% expect_error(class = "missing_checkbox_fields") - check_fields_exist(fields = c(1,2,3), expr = expr(starts_with("temp"))) %>% + check_fields_exist(fields = c(1, 2, 3), expr = expr(starts_with("temp"))) %>% expect_no_error() }) test_that("check_fields_are_checkboxes works", { metadata <- tibble::tribble( ~field_name, ~field_type, - "record_id", "text", + "record_id", "text", "text_field", "text", "calc_field", "calc", "checkbox___1", "checkbox", diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index 61aa00fc..7c807f30 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -37,10 +37,12 @@ supertbl <- tibble::tribble( class(supertbl) <- c("redcap_supertbl", class(supertbl)) test_that("combine_checkboxes returns an expected supertbl", { - out <- combine_checkboxes(supertbl = supertbl, - form_name = "nonrepeat_instrument", - cols = starts_with("multi"), - values_to = "new_col") # values_fill declared + out <- combine_checkboxes( + supertbl = supertbl, + form_name = "nonrepeat_instrument", + cols = starts_with("multi"), + values_to = "new_col" + ) # values_fill declared expect_setequal(class(out), c("redcap_supertbl", "tbl_df", "tbl", "data.frame")) expect_equal(nrow(out), 2) @@ -96,10 +98,12 @@ test_that("combine_checkboxes works for nonrepeat instrument and drop old values }) test_that("combine_checkboxes works for repeat instrument", { - out <- combine_checkboxes(supertbl = supertbl, - form_name = "repeat_instrument", - cols = starts_with("repeat"), - values_to = "new_col") %>% + out <- combine_checkboxes( + supertbl = supertbl, + form_name = "repeat_instrument", + cols = starts_with("repeat"), + values_to = "new_col" + ) %>% pull(redcap_data) %>% dplyr::nth(2) @@ -121,7 +125,12 @@ test_that("get_metadata_ref works", { select(study_id, contains("multi")) %>% mutate(new_data = c(FALSE, TRUE, FALSE)) - out <- get_metadata_ref(data = data, supertbl = supertbl, form_name = "nonrepeat_instrument", instrument_identifiers = "study_id") + out <- get_metadata_ref( + data = data, + supertbl = supertbl, + form_name = "nonrepeat_instrument", + instrument_identifiers = "study_id" + ) expected_out <- tibble::tribble( ~"field_name", ~"raw", ~"label", From eb111526e5da7be04a68926d0865dab7a3a7b567 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 15:48:16 -0400 Subject: [PATCH 15/39] Add combine_checkboxes() to pkgdown --- pkgdown/_pkgdown.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 7801c4fe..ede73fed 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -64,6 +64,11 @@ reference: Export a supertibble to other usable formats. contents: - write_redcap_xlsx +- title: "Supertibble Post-Processing" + desc: > + Helpful functions for supertibble data analytics. + contents: + - combine_checkboxes - title: "Data" contents: - superheroes_supertbl From 7348324ba7db87ee030fd613542f54774fe5d00b Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 16:02:35 -0400 Subject: [PATCH 16/39] Remove revdepcheck, update renv --- renv.lock | 2 +- renv/settings.json | 2 +- revdep/.gitignore | 7 ---- revdep/README.md | 89 ---------------------------------------------- revdep/cran.md | 7 ---- revdep/failures.md | 1 - revdep/problems.md | 1 - utility/refresh.R | 2 +- 8 files changed, 3 insertions(+), 108 deletions(-) delete mode 100644 revdep/.gitignore delete mode 100644 revdep/README.md delete mode 100644 revdep/cran.md delete mode 100644 revdep/failures.md delete mode 100644 revdep/problems.md diff --git a/renv.lock b/renv.lock index 27491c14..63c2beee 100644 --- a/renv.lock +++ b/renv.lock @@ -1,6 +1,6 @@ { "R": { - "Version": "4.2.0", + "Version": "4.4.0", "Repositories": [ { "Name": "CRAN", diff --git a/renv/settings.json b/renv/settings.json index 0e163611..08d0def7 100644 --- a/renv/settings.json +++ b/renv/settings.json @@ -9,7 +9,7 @@ ], "ppm.enabled": null, "ppm.ignored.urls": [], - "r.version": "4.2.0", + "r.version": "4.4.0", "snapshot.type": "implicit", "use.cache": true, "vcs.ignore.cellar": true, diff --git a/revdep/.gitignore b/revdep/.gitignore deleted file mode 100644 index 111ab324..00000000 --- a/revdep/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -checks -library -checks.noindex -library.noindex -cloud.noindex -data.sqlite -*.html diff --git a/revdep/README.md b/revdep/README.md deleted file mode 100644 index dab9e040..00000000 --- a/revdep/README.md +++ /dev/null @@ -1,89 +0,0 @@ -# Platform - -|field |value | -|:--------|:-----------------------------------| -|version |R version 4.2.3 (2023-03-15) | -|os |macOS Ventura 13.6.5 | -|system |aarch64, darwin20 | -|ui |RStudio | -|language |(EN) | -|collate |en_US.UTF-8 | -|ctype |en_US.UTF-8 | -|tz |America/New_York | -|date |2024-04-11 | -|rstudio |2023.12.1+402 Ocean Storm (desktop) | -|pandoc |NA | - -# Dependencies - -|package |old |new |Δ | -|:------------|:-------|:-------|:--| -|REDCapTidieR |1.1.0 |1.1.1 |* | -|askpass |1.2.0 |1.2.0 | | -|backports |1.4.1 |1.4.1 | | -|base64enc |0.1-3 |0.1-3 | | -|bit |4.0.5 |4.0.5 | | -|bit64 |4.0.5 |4.0.5 | | -|bslib |0.7.0 |0.7.0 | | -|cachem |1.0.8 |1.0.8 | | -|checkmate |2.3.1 |2.3.1 | | -|cli |3.6.2 |3.6.2 | | -|clipr |0.8.0 |0.8.0 | | -|cpp11 |0.4.7 |0.4.7 | | -|crayon |1.5.2 |1.5.2 | | -|curl |5.2.1 |5.2.1 | | -|digest |0.6.35 |0.6.35 | | -|dplyr |1.1.4 |1.1.4 | | -|evaluate |0.23 |0.23 | | -|fansi |1.0.6 |1.0.6 | | -|fastmap |1.1.1 |1.1.1 | | -|fontawesome |0.5.2 |0.5.2 | | -|formattable |0.2.1 |0.2.1 | | -|fs |1.6.3 |1.6.3 | | -|generics |0.1.3 |0.1.3 | | -|glue |1.7.0 |1.7.0 | | -|highr |0.10 |0.10 | | -|hms |1.1.3 |1.1.3 | | -|htmltools |0.5.8.1 |0.5.8.1 | | -|htmlwidgets |1.6.4 |1.6.4 | | -|httr |1.4.7 |1.4.7 | | -|jquerylib |0.1.4 |0.1.4 | | -|jsonlite |1.8.8 |1.8.8 | | -|knitr |1.46 |1.46 | | -|lifecycle |1.0.4 |1.0.4 | | -|lobstr |1.1.2 |1.1.2 | | -|lubridate |1.9.3 |1.9.3 | | -|magrittr |2.0.3 |2.0.3 | | -|memoise |2.0.1 |2.0.1 | | -|mime |0.12 |0.12 | | -|openssl |2.1.1 |2.1.1 | | -|pillar |1.9.0 |1.9.0 | | -|pkgconfig |2.0.3 |2.0.3 | | -|prettyunits |1.2.0 |1.2.0 | | -|progress |1.2.3 |1.2.3 | | -|purrr |1.0.2 |1.0.2 | | -|R6 |2.5.1 |2.5.1 | | -|rappdirs |0.3.3 |0.3.3 | | -|readr |2.1.5 |2.1.5 | | -|REDCapR |1.1.0 |1.1.0 | | -|rlang |1.1.3 |1.1.3 | | -|rmarkdown |2.26 |2.26 | | -|sass |0.4.9 |0.4.9 | | -|stringi |1.8.3 |1.8.3 | | -|stringr |1.5.1 |1.5.1 | | -|sys |3.4.2 |3.4.2 | | -|tibble |3.2.1 |3.2.1 | | -|tidyr |1.3.1 |1.3.1 | | -|tidyselect |1.2.1 |1.2.1 | | -|timechange |0.3.0 |0.3.0 | | -|tinytex |0.50 |0.50 | | -|tzdb |0.4.0 |0.4.0 | | -|utf8 |1.2.4 |1.2.4 | | -|vctrs |0.6.5 |0.6.5 | | -|vroom |1.6.5 |1.6.5 | | -|withr |3.0.0 |3.0.0 | | -|xfun |0.43 |0.43 | | -|yaml |2.3.8 |2.3.8 | | - -# Revdeps - diff --git a/revdep/cran.md b/revdep/cran.md deleted file mode 100644 index 33114b63..00000000 --- a/revdep/cran.md +++ /dev/null @@ -1,7 +0,0 @@ -## revdepcheck results - -We checked 0 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. - - * We saw 0 new problems - * We failed to check 0 packages - diff --git a/revdep/failures.md b/revdep/failures.md deleted file mode 100644 index 9a207363..00000000 --- a/revdep/failures.md +++ /dev/null @@ -1 +0,0 @@ -*Wow, no problems at all. :)* \ No newline at end of file diff --git a/revdep/problems.md b/revdep/problems.md deleted file mode 100644 index 9a207363..00000000 --- a/revdep/problems.md +++ /dev/null @@ -1 +0,0 @@ -*Wow, no problems at all. :)* \ No newline at end of file diff --git a/utility/refresh.R b/utility/refresh.R index 3d580ec4..82518bda 100644 --- a/utility/refresh.R +++ b/utility/refresh.R @@ -67,7 +67,7 @@ devtools::check( # Equivalent of R-hub rhub::rhub_check(platforms = c("linux", "windows", "ubuntu-next", "ubuntu-release")) devtools::check_win_devel(email = "porterej@chop.edu") # CRAN submission policies encourage the development version # Note: Must be off of VPN -revdepcheck::revdep_check(num_workers = 4) +# revdepcheck::revdep_check(num_workers = 4) # Deprecated # Careful, the last question ultimately uploads it to CRAN, where you can't delete/reverse your decision. # Run as not CRAN to build full vignettes # withr::with_envvar( From 62080afd33a73d0e9332f007e8ff9d65339a3a6c Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 15 Jul 2024 16:38:59 -0400 Subject: [PATCH 17/39] Add standard checks for params --- R/reduce_multi_to_single_column.R | 35 ++++++++++++------- man/combine_checkboxes.Rd | 7 ++-- man/get_metadata_ref.Rd | 4 +-- .../test-reduce_multi_to_single_column.R | 10 +++--- 4 files changed, 32 insertions(+), 24 deletions(-) diff --git a/R/reduce_multi_to_single_column.R b/R/reduce_multi_to_single_column.R index 1f3fe717..b4119fe8 100644 --- a/R/reduce_multi_to_single_column.R +++ b/R/reduce_multi_to_single_column.R @@ -7,8 +7,7 @@ #' factor column. #' #' @param supertbl A supertibble generated by [read_redcap()]. Required. -#' @param form_name The name of the REDCap form (instrument) containing the checkbox -#' fields. Required. +#' @param tbl The `redcap_form_name` of the data tibble to extract. Required. #' @param cols <[`tidy-select`][tidyr_tidy_select]> Checbox columns to combine to #' single column. Required. #' @param values_to A string specifying the name of the column to combine checkbox @@ -28,7 +27,7 @@ #' supertbl <- read_redcap(redcap_uri, token) #' combined_tbl <- combine_checkboxes( #' supertbl = supertbl, -#' form_name = "demographics", +#' tbl = "demographics", #' cols = starts_with("race"), #' values_to = "race_combined", #' multi_value_label = "Multiple", @@ -39,19 +38,29 @@ #' @export combine_checkboxes <- function(supertbl, - form_name, + tbl, cols, values_to, multi_value_label = "Multiple", values_fill = NA, raw_or_label = "label", keep = TRUE) { + + # Check args --- + check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata")) + check_arg_is_character(tbl, len = 1, any.missing = FALSE) + check_arg_is_character(values_to, len = 1, any.missing = FALSE) + check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE) + check_arg_is_character(values_fill, len = 1, any.missing = TRUE) + check_arg_choices(raw_or_label, choices = c("label", "raw")) + check_arg_is_logical(keep, len = 1, any.missing = FALSE) + # Save user cols to enquosure cols_exp <- enquo(cols) - # Extract form_name from supertbl + # Extract tbl from supertbl data_tbl <- supertbl %>% - extract_tibble(form_name) + extract_tibble(tbl) # Get field names from cols_exp, check that fields exist field_names <- names(eval_select(cols_exp, data = data_tbl)) @@ -59,7 +68,7 @@ combine_checkboxes <- function(supertbl, # Assume the first instrument in the metadata contains IDs # REDCap enforces this constraints, we reflect this in read_redcap -> get_field_to_drop - record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]]$field_name[1] + record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]$field_name[1] # Combine record identifier with remaining possible project identifiers instrument_identifiers <- c( @@ -82,7 +91,7 @@ combine_checkboxes <- function(supertbl, ) # Get metadata reference table, check that chosen fields are checkboxes - metadata <- get_metadata_ref(data_tbl_mod, supertbl, form_name, instrument_identifiers) + metadata <- get_metadata_ref(data_tbl_mod, supertbl, tbl, instrument_identifiers) # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% @@ -115,7 +124,7 @@ combine_checkboxes <- function(supertbl, !!values_to := factor(!!sym(values_to), levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)) ) - # Join back onto original data form_name + # Join back onto original data tbl data_tbl_mod <- data_tbl_mod %>% right_join(data_tbl, by = intersect(instrument_identifiers, names(data_tbl_mod))) %>% relocate(!!values_to, .after = everything()) @@ -127,7 +136,7 @@ combine_checkboxes <- function(supertbl, } # Update the supertbl data tibble - supertbl$redcap_data[supertbl$redcap_form_name == form_name][[1]] <- data_tbl_mod + supertbl$redcap_data[supertbl$redcap_form_name == tbl][[1]] <- data_tbl_mod supertbl } @@ -136,7 +145,7 @@ combine_checkboxes <- function(supertbl, #' #' @param data a data tibble #' @param supertbl A supertibble generated by [read_redcap()]. -#' @param form_name The name of the REDCap form (instrument) containing the checkbox +#' @param tbl The name of the REDCap form (instrument) containing the checkbox #' fields. #' @param instrument_identifiers Character string vector of project record identifier vars #' @@ -145,10 +154,10 @@ combine_checkboxes <- function(supertbl, #' @keywords internal get_metadata_ref <- function(data, supertbl, - form_name, + tbl, instrument_identifiers) { # Create a metadata reference table linking field name to raw and label values - out <- supertbl$redcap_metadata[supertbl$redcap_form_name == form_name][[1]] %>% + out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% filter(.data$field_name %in% names(data)[!names(data) %in% instrument_identifiers]) # Make sure selection is checkbox metadata field type diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index 8fc340e3..f7b347c8 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -6,7 +6,7 @@ \usage{ combine_checkboxes( supertbl, - form_name, + tbl, cols, values_to, multi_value_label = "Multiple", @@ -18,8 +18,7 @@ combine_checkboxes( \arguments{ \item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}. Required.} -\item{form_name}{The name of the REDCap form (instrument) containing the checkbox -fields. Required.} +\item{tbl}{The \code{redcap_form_name} of the data tibble to extract. Required.} \item{cols}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Checbox columns to combine to single column. Required.} @@ -52,7 +51,7 @@ factor column. supertbl <- read_redcap(redcap_uri, token) combined_tbl <- combine_checkboxes( supertbl = supertbl, - form_name = "demographics", + tbl = "demographics", cols = starts_with("race"), values_to = "race_combined", multi_value_label = "Multiple", diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd index 75ff31c4..3ff8f401 100644 --- a/man/get_metadata_ref.Rd +++ b/man/get_metadata_ref.Rd @@ -4,14 +4,14 @@ \alias{get_metadata_ref} \title{Utility function for getting metadata raw and label values for checkboxes} \usage{ -get_metadata_ref(data, supertbl, form_name, instrument_identifiers) +get_metadata_ref(data, supertbl, tbl, instrument_identifiers) } \arguments{ \item{data}{a data tibble} \item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} -\item{form_name}{The name of the REDCap form (instrument) containing the checkbox +\item{tbl}{The name of the REDCap form (instrument) containing the checkbox fields.} \item{instrument_identifiers}{Character string vector of project record identifier vars} diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index 7c807f30..79596220 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -39,7 +39,7 @@ class(supertbl) <- c("redcap_supertbl", class(supertbl)) test_that("combine_checkboxes returns an expected supertbl", { out <- combine_checkboxes( supertbl = supertbl, - form_name = "nonrepeat_instrument", + tbl = "nonrepeat_instrument", cols = starts_with("multi"), values_to = "new_col" ) # values_fill declared @@ -51,7 +51,7 @@ test_that("combine_checkboxes returns an expected supertbl", { test_that("combine_checkboxes works for nonrepeat instrument", { out <- combine_checkboxes( supertbl = supertbl, - form_name = "nonrepeat_instrument", + tbl = "nonrepeat_instrument", cols = starts_with("multi"), values_to = "new_col", multi_value_label = "multiple", # multi_value_label declared @@ -76,7 +76,7 @@ test_that("combine_checkboxes works for nonrepeat instrument", { test_that("combine_checkboxes works for nonrepeat instrument and drop old values", { out <- combine_checkboxes( supertbl = supertbl, - form_name = "nonrepeat_instrument", + tbl = "nonrepeat_instrument", cols = starts_with("multi"), values_to = "new_col", keep = FALSE # Test keep = FALSE @@ -100,7 +100,7 @@ test_that("combine_checkboxes works for nonrepeat instrument and drop old values test_that("combine_checkboxes works for repeat instrument", { out <- combine_checkboxes( supertbl = supertbl, - form_name = "repeat_instrument", + tbl = "repeat_instrument", cols = starts_with("repeat"), values_to = "new_col" ) %>% @@ -128,7 +128,7 @@ test_that("get_metadata_ref works", { out <- get_metadata_ref( data = data, supertbl = supertbl, - form_name = "nonrepeat_instrument", + tbl = "nonrepeat_instrument", instrument_identifiers = "study_id" ) From 522d01dc433f073549a8b3648a5dfbf97b162116 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Tue, 16 Jul 2024 13:56:07 -0400 Subject: [PATCH 18/39] Filename update --- R/{reduce_multi_to_single_column.R => combine_checkboxes.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{reduce_multi_to_single_column.R => combine_checkboxes.R} (100%) diff --git a/R/reduce_multi_to_single_column.R b/R/combine_checkboxes.R similarity index 100% rename from R/reduce_multi_to_single_column.R rename to R/combine_checkboxes.R From 3a395cf60c6c7b2d32d9179ae5a5e2a134b015a7 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Tue, 16 Jul 2024 13:56:34 -0400 Subject: [PATCH 19/39] Filename change --- man/combine_checkboxes.Rd | 2 +- man/get_metadata_ref.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index f7b347c8..f2ed6094 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reduce_multi_to_single_column.R +% Please edit documentation in R/combine_checkboxes.R \name{combine_checkboxes} \alias{combine_checkboxes} \title{Combine Checkbox Fields into a Single Column} diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd index 3ff8f401..66ebde38 100644 --- a/man/get_metadata_ref.Rd +++ b/man/get_metadata_ref.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/reduce_multi_to_single_column.R +% Please edit documentation in R/combine_checkboxes.R \name{get_metadata_ref} \alias{get_metadata_ref} \title{Utility function for getting metadata raw and label values for checkboxes} From 347d2a356a2f4e81fb25f59d34cd1f7a9aff8403 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 17 Jul 2024 08:48:18 -0400 Subject: [PATCH 20/39] Rename test file --- ...-reduce_multi_to_single_column.R => test-combine_checkboxes.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/testthat/{test-reduce_multi_to_single_column.R => test-combine_checkboxes.R} (100%) diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-combine_checkboxes.R similarity index 100% rename from tests/testthat/test-reduce_multi_to_single_column.R rename to tests/testthat/test-combine_checkboxes.R From cce0d120dca6febabad1c94f9ff9e20b36d51fb6 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 17 Jul 2024 09:22:03 -0400 Subject: [PATCH 21/39] Fix record_id_field assign, remove rowwise call --- R/combine_checkboxes.R | 7 +- .../test-reduce_multi_to_single_column.R | 162 ++++++++++++++++++ 2 files changed, 164 insertions(+), 5 deletions(-) create mode 100644 tests/testthat/test-reduce_multi_to_single_column.R diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index b4119fe8..3c866b6e 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -66,9 +66,8 @@ combine_checkboxes <- function(supertbl, field_names <- names(eval_select(cols_exp, data = data_tbl)) check_fields_exist(fields = field_names, expr = cols_exp) - # Assume the first instrument in the metadata contains IDs - # REDCap enforces this constraints, we reflect this in read_redcap -> get_field_to_drop - record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]$field_name[1] + # Identify record_id field + record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) # Combine record identifier with remaining possible project identifiers instrument_identifiers <- c( @@ -110,7 +109,6 @@ combine_checkboxes <- function(supertbl, across(field_names, as.character), # enforce to character strings across(!!values_to, ~ as.character(.)) ) %>% - rowwise() %>% mutate( !!values_to := ifelse(!!sym(values_to) == "TRUE", multi_value_label, @@ -118,7 +116,6 @@ combine_checkboxes <- function(supertbl, ), !!values_to := ifelse(is.na(!!sym(values_to)), values_fill, !!sym(values_to)) ) %>% - ungroup() %>% select(any_of(instrument_identifiers), !!values_to) %>% mutate( !!values_to := factor(!!sym(values_to), levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)) diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R new file mode 100644 index 00000000..79596220 --- /dev/null +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -0,0 +1,162 @@ +nonrepeat_data <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", + 1, TRUE, FALSE, FALSE, 1, + 2, TRUE, TRUE, FALSE, 2, + 3, FALSE, FALSE, FALSE, 3 +) + +nonrepeat_metadata <- tibble::tribble( + ~"field_name", ~"field_type", ~"select_choices_or_calculations", + "study_id", "text", NA, + "multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue", + "multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue", + "multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue", + "extra_data", "dropdown", "1, 1 | 2, 2 | 3,3" +) + +repeat_data <- tibble::tribble( + ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", + 1, "event_1", 1, TRUE, FALSE, + 2, "event_1", 1, TRUE, TRUE, + 2, "event_1", 2, FALSE, FALSE +) + +repeat_metadata <- tibble::tribble( + ~"field_name", ~"field_type", ~"select_choices_or_calculations", + "study_id", "text", NA, + "repeat___1", "checkbox", "1, A | 2, B | 3, C", + "repeat___2", "checkbox", "1, A | 2, B | 3, C" +) + +supertbl <- tibble::tribble( + ~"redcap_form_name", ~"redcap_data", ~"redcap_metadata", + "nonrepeat_instrument", nonrepeat_data, nonrepeat_metadata, + "repeat_instrument", repeat_data, repeat_metadata +) + +class(supertbl) <- c("redcap_supertbl", class(supertbl)) + +test_that("combine_checkboxes returns an expected supertbl", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = starts_with("multi"), + values_to = "new_col" + ) # values_fill declared + + expect_setequal(class(out), c("redcap_supertbl", "tbl_df", "tbl", "data.frame")) + expect_equal(nrow(out), 2) +}) + +test_that("combine_checkboxes works for nonrepeat instrument", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = starts_with("multi"), + values_to = "new_col", + multi_value_label = "multiple", # multi_value_label declared + values_fill = "none" # values_fill declared + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col", + 1, TRUE, FALSE, FALSE, 1, "Red", + 2, TRUE, TRUE, FALSE, 2, "multiple", + 3, FALSE, FALSE, FALSE, 3, "none" + ) %>% + mutate( + new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none")) + ) + + expect_equal(out, expected_out) +}) + +test_that("combine_checkboxes works for nonrepeat instrument and drop old values", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = starts_with("multi"), + values_to = "new_col", + keep = FALSE # Test keep = FALSE + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id", ~"extra_data", ~"new_col", + 1, 1, "Red", + 2, 2, "Multiple", + 3, 3, NA + ) %>% + mutate( + new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple")) + ) + + expect_equal(out, expected_out) +}) + +test_that("combine_checkboxes works for repeat instrument", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "repeat_instrument", + cols = starts_with("repeat"), + values_to = "new_col" + ) %>% + pull(redcap_data) %>% + dplyr::nth(2) + + expected_out <- tibble::tribble( + ~"study_id", ~"redcap_form_instance", ~"redcap_event", ~"repeat___1", ~"repeat___2", ~"new_col", + 1, 1, "event_1", TRUE, FALSE, "A", + 2, 1, "event_1", TRUE, TRUE, "Multiple", + 2, 2, "event_1", FALSE, FALSE, NA + ) %>% + mutate( + new_col = factor(new_col, levels = c("A", "B", "Multiple")) + ) + + expect_equal(out, expected_out) +}) + +test_that("get_metadata_ref works", { + data <- nonrepeat_data %>% + select(study_id, contains("multi")) %>% + mutate(new_data = c(FALSE, TRUE, FALSE)) + + out <- get_metadata_ref( + data = data, + supertbl = supertbl, + tbl = "nonrepeat_instrument", + instrument_identifiers = "study_id" + ) + + expected_out <- tibble::tribble( + ~"field_name", ~"raw", ~"label", + "multi___1", "1", "Red", + "multi___2", "2", "Yellow", + "multi___3", "3", "Blue" + ) + + expect_equal(out, expected_out) +}) + +test_that("replace_true works", { + metadata <- tibble::tribble( + ~"field_name", ~"raw", ~"label", + "multi___1", "1", "Red", + "multi___2", "2", "Yellow", + "multi___3", "3", "Blue" + ) + + out <- replace_true(col = c(TRUE, TRUE, FALSE), col_name = "multi___1", metadata = metadata, raw_or_label = "raw") + expected_out <- c("1", "1", NA) + + expect_equal(out, expected_out) + + out <- replace_true(col = c(TRUE, TRUE, FALSE), col_name = "multi___1", metadata = metadata, raw_or_label = "label") + expected_out <- c("Red", "Red", NA) + + expect_equal(out, expected_out) +}) From c250eda9474b9e8c655e309165c61d63a1e2d5c8 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 17 Jul 2024 14:12:42 -0400 Subject: [PATCH 22/39] Remove instrument_identifiers, use bind_cols --- NAMESPACE | 1 + R/REDCapTidieR-package.R | 2 +- R/combine_checkboxes.R | 42 +++++++------------ man/get_metadata_ref.Rd | 4 +- .../test-reduce_multi_to_single_column.R | 10 ++--- 5 files changed, 24 insertions(+), 35 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 7dc0131a..256df5a1 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,6 +39,7 @@ importFrom(cli,cli_warn) importFrom(cli,qty) importFrom(dplyr,"%>%") importFrom(dplyr,across) +importFrom(dplyr,bind_cols) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) importFrom(dplyr,coalesce) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index 5f4105d5..82c863a0 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -6,7 +6,7 @@ #' @importFrom cli cli_abort cli_fmt cli_text cli_vec cli_warn qty #' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else #' left_join mutate pull recode relocate rename right_join row_number rowwise -#' select slice summarise ungroup coalesce cur_column +#' select slice summarise ungroup coalesce cur_column bind_cols #' @importFrom formattable percent #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 3c866b6e..0166c793 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -69,19 +69,9 @@ combine_checkboxes <- function(supertbl, # Identify record_id field record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) - # Combine record identifier with remaining possible project identifiers - instrument_identifiers <- c( - record_id_field, - "redcap_form_instance", - "redcap_form_name", - "redcap_event", - "redcap_event_instance" - ) - # Define values_to as the count of TRUEs/1s for the given checkbox field # Assign TRUE if multiple selections made, and FALSE if one or zero made data_tbl_mod <- data_tbl %>% - select(any_of(instrument_identifiers), !!!eval_select(cols_exp, data_tbl)) %>% mutate( !!values_to := case_when( rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, @@ -90,16 +80,16 @@ combine_checkboxes <- function(supertbl, ) # Get metadata reference table, check that chosen fields are checkboxes - metadata <- get_metadata_ref(data_tbl_mod, supertbl, tbl, instrument_identifiers) + metadata <- get_metadata_ref(data_tbl_mod, supertbl, tbl, field_names) # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% mutate(across( - -c(any_of(instrument_identifiers), !!values_to), + field_names, ~ replace_true(.x, - cur_column(), - metadata = metadata, - raw_or_label = raw_or_label + cur_column(), + metadata = metadata, + raw_or_label = raw_or_label ) )) @@ -111,29 +101,26 @@ combine_checkboxes <- function(supertbl, ) %>% mutate( !!values_to := ifelse(!!sym(values_to) == "TRUE", - multi_value_label, - coalesce(!!!syms(field_names)) + multi_value_label, + coalesce(!!!syms(field_names)) ), !!values_to := ifelse(is.na(!!sym(values_to)), values_fill, !!sym(values_to)) ) %>% - select(any_of(instrument_identifiers), !!values_to) %>% + # select(any_of(instrument_identifiers), !!values_to) %>% mutate( !!values_to := factor(!!sym(values_to), levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)) ) - # Join back onto original data tbl - data_tbl_mod <- data_tbl_mod %>% - right_join(data_tbl, by = intersect(instrument_identifiers, names(data_tbl_mod))) %>% - relocate(!!values_to, .after = everything()) + final_tbl <- bind_cols(data_tbl, data_tbl_mod %>% select(!!values_to)) # Keep or remove original multi columns if (!keep) { - data_tbl_mod <- data_tbl_mod %>% + final_tbl <- final_tbl %>% select(-field_names) } # Update the supertbl data tibble - supertbl$redcap_data[supertbl$redcap_form_name == tbl][[1]] <- data_tbl_mod + supertbl$redcap_data[supertbl$redcap_form_name == tbl][[1]] <- final_tbl supertbl } @@ -144,7 +131,7 @@ combine_checkboxes <- function(supertbl, #' @param supertbl A supertibble generated by [read_redcap()]. #' @param tbl The name of the REDCap form (instrument) containing the checkbox #' fields. -#' @param instrument_identifiers Character string vector of project record identifier vars +#' @param field_names Character string vector of field names for checkbox combination #' #' @returns a tibble #' @@ -152,10 +139,11 @@ combine_checkboxes <- function(supertbl, get_metadata_ref <- function(data, supertbl, tbl, - instrument_identifiers) { + field_names) { + # Create a metadata reference table linking field name to raw and label values out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% - filter(.data$field_name %in% names(data)[!names(data) %in% instrument_identifiers]) + filter(.data$field_name %in% field_names) # Make sure selection is checkbox metadata field type check_fields_are_checkboxes(out) diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd index 66ebde38..afe895d9 100644 --- a/man/get_metadata_ref.Rd +++ b/man/get_metadata_ref.Rd @@ -4,7 +4,7 @@ \alias{get_metadata_ref} \title{Utility function for getting metadata raw and label values for checkboxes} \usage{ -get_metadata_ref(data, supertbl, tbl, instrument_identifiers) +get_metadata_ref(data, supertbl, tbl, field_names) } \arguments{ \item{data}{a data tibble} @@ -14,7 +14,7 @@ get_metadata_ref(data, supertbl, tbl, instrument_identifiers) \item{tbl}{The name of the REDCap form (instrument) containing the checkbox fields.} -\item{instrument_identifiers}{Character string vector of project record identifier vars} +\item{field_names}{Character string vector of field names for checkbox combination} } \value{ a tibble diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R index 79596220..b41927c1 100644 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ b/tests/testthat/test-reduce_multi_to_single_column.R @@ -108,10 +108,10 @@ test_that("combine_checkboxes works for repeat instrument", { dplyr::nth(2) expected_out <- tibble::tribble( - ~"study_id", ~"redcap_form_instance", ~"redcap_event", ~"repeat___1", ~"repeat___2", ~"new_col", - 1, 1, "event_1", TRUE, FALSE, "A", - 2, 1, "event_1", TRUE, TRUE, "Multiple", - 2, 2, "event_1", FALSE, FALSE, NA + ~"study_id", ~"redcap_event",~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"new_col", + 1, "event_1", 1, TRUE, FALSE, "A", + 2, "event_1", 1, TRUE, TRUE, "Multiple", + 2, "event_1", 2, FALSE, FALSE, NA ) %>% mutate( new_col = factor(new_col, levels = c("A", "B", "Multiple")) @@ -129,7 +129,7 @@ test_that("get_metadata_ref works", { data = data, supertbl = supertbl, tbl = "nonrepeat_instrument", - instrument_identifiers = "study_id" + field_names = c("multi___1", "multi___2", "multi___3") ) expected_out <- tibble::tribble( From b0a85642658e6063aae4c51afe874c018afe07b0 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 17 Jul 2024 14:33:02 -0400 Subject: [PATCH 23/39] Implement parse_labels, clean code, fix tests --- NAMESPACE | 2 + R/REDCapTidieR-package.R | 2 +- R/combine_checkboxes.R | 33 ++-- tests/testthat/test-combine_checkboxes.R | 23 +-- .../test-reduce_multi_to_single_column.R | 162 ------------------ 5 files changed, 31 insertions(+), 191 deletions(-) delete mode 100644 tests/testthat/test-reduce_multi_to_single_column.R diff --git a/NAMESPACE b/NAMESPACE index 256df5a1..e48eab82 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -45,11 +45,13 @@ importFrom(dplyr,case_when) importFrom(dplyr,coalesce) importFrom(dplyr,cur_column) importFrom(dplyr,filter) +importFrom(dplyr,first) importFrom(dplyr,group_by) importFrom(dplyr,if_any) importFrom(dplyr,if_else) importFrom(dplyr,left_join) importFrom(dplyr,mutate) +importFrom(dplyr,nth) importFrom(dplyr,pull) importFrom(dplyr,recode) importFrom(dplyr,relocate) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index 82c863a0..f942d740 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -6,7 +6,7 @@ #' @importFrom cli cli_abort cli_fmt cli_text cli_vec cli_warn qty #' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else #' left_join mutate pull recode relocate rename right_join row_number rowwise -#' select slice summarise ungroup coalesce cur_column bind_cols +#' select slice summarise ungroup coalesce cur_column bind_cols first nth #' @importFrom formattable percent #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 0166c793..e127486f 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -93,6 +93,7 @@ combine_checkboxes <- function(supertbl, ) )) + # Convert values_to from TRUE/FALSE to multi_value_label or identified single val data_tbl_mod <- data_tbl_mod %>% mutate( @@ -104,14 +105,17 @@ combine_checkboxes <- function(supertbl, multi_value_label, coalesce(!!!syms(field_names)) ), - !!values_to := ifelse(is.na(!!sym(values_to)), values_fill, !!sym(values_to)) + !!values_to := ifelse(is.na(!!sym(values_to)), + values_fill, + !!sym(values_to)) ) %>% - # select(any_of(instrument_identifiers), !!values_to) %>% mutate( - !!values_to := factor(!!sym(values_to), levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)) + !!values_to := factor(!!sym(values_to), + levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)) ) - final_tbl <- bind_cols(data_tbl, data_tbl_mod %>% select(!!values_to)) + final_tbl <- bind_cols(data_tbl, + data_tbl_mod %>% select(!!values_to)) # Keep or remove original multi columns if (!keep) { @@ -143,24 +147,19 @@ get_metadata_ref <- function(data, # Create a metadata reference table linking field name to raw and label values out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% - filter(.data$field_name %in% field_names) + filter(.data$field_name %in% field_names) %>% + # TODO: original_field a temporary placeholder for future multi-field and mapping dev + mutate( + original_field = sub("___.*$", "", .data$field_name) + ) # Make sure selection is checkbox metadata field type check_fields_are_checkboxes(out) - out <- out %>% - select(.data$field_name, .data$select_choices_or_calculations) %>% - mutate( - original_field = sub("___.*$", "", .data$field_name) - ) %>% - mutate( - pairs = strsplit(.data$select_choices_or_calculations, " \\| "), - label_value = NA, - label_value = purrr::map2_chr(pairs, row_number(), \(.x, .y) .x[.y]) - ) + # TODO: Make more robust for multi-field and mapping, using original_field above + parsed_vals <- parse_labels(first(out$select_choices_or_calculations)) - out %>% - separate_wider_delim(.data$label_value, delim = ", ", names = c("raw", "label")) %>% + bind_cols(out, parsed_vals) %>% select(.data$field_name, .data$raw, .data$label) } diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 79596220..acfa32f8 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -15,17 +15,18 @@ nonrepeat_metadata <- tibble::tribble( ) repeat_data <- tibble::tribble( - ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", - 1, "event_1", 1, TRUE, FALSE, - 2, "event_1", 1, TRUE, TRUE, - 2, "event_1", 2, FALSE, FALSE + ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", + 1, "event_1", 1, TRUE, FALSE, FALSE, + 2, "event_1", 1, TRUE, TRUE, TRUE, + 2, "event_1", 2, FALSE, FALSE, FALSE ) repeat_metadata <- tibble::tribble( ~"field_name", ~"field_type", ~"select_choices_or_calculations", "study_id", "text", NA, "repeat___1", "checkbox", "1, A | 2, B | 3, C", - "repeat___2", "checkbox", "1, A | 2, B | 3, C" + "repeat___2", "checkbox", "1, A | 2, B | 3, C", + "repeat___3", "checkbox", "1, A | 2, B | 3, C" ) supertbl <- tibble::tribble( @@ -108,13 +109,13 @@ test_that("combine_checkboxes works for repeat instrument", { dplyr::nth(2) expected_out <- tibble::tribble( - ~"study_id", ~"redcap_form_instance", ~"redcap_event", ~"repeat___1", ~"repeat___2", ~"new_col", - 1, 1, "event_1", TRUE, FALSE, "A", - 2, 1, "event_1", TRUE, TRUE, "Multiple", - 2, 2, "event_1", FALSE, FALSE, NA + ~"study_id", ~"redcap_event",~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", ~"new_col", + 1, "event_1", 1, TRUE, FALSE, FALSE, "A", + 2, "event_1", 1, TRUE, TRUE, TRUE, "Multiple", + 2, "event_1", 2, FALSE, FALSE, FALSE, NA ) %>% mutate( - new_col = factor(new_col, levels = c("A", "B", "Multiple")) + new_col = factor(new_col, levels = c("A", "B", "C", "Multiple")) ) expect_equal(out, expected_out) @@ -129,7 +130,7 @@ test_that("get_metadata_ref works", { data = data, supertbl = supertbl, tbl = "nonrepeat_instrument", - instrument_identifiers = "study_id" + field_names = c("multi___1", "multi___2", "multi___3") ) expected_out <- tibble::tribble( diff --git a/tests/testthat/test-reduce_multi_to_single_column.R b/tests/testthat/test-reduce_multi_to_single_column.R deleted file mode 100644 index b41927c1..00000000 --- a/tests/testthat/test-reduce_multi_to_single_column.R +++ /dev/null @@ -1,162 +0,0 @@ -nonrepeat_data <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", - 1, TRUE, FALSE, FALSE, 1, - 2, TRUE, TRUE, FALSE, 2, - 3, FALSE, FALSE, FALSE, 3 -) - -nonrepeat_metadata <- tibble::tribble( - ~"field_name", ~"field_type", ~"select_choices_or_calculations", - "study_id", "text", NA, - "multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue", - "multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue", - "multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue", - "extra_data", "dropdown", "1, 1 | 2, 2 | 3,3" -) - -repeat_data <- tibble::tribble( - ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", - 1, "event_1", 1, TRUE, FALSE, - 2, "event_1", 1, TRUE, TRUE, - 2, "event_1", 2, FALSE, FALSE -) - -repeat_metadata <- tibble::tribble( - ~"field_name", ~"field_type", ~"select_choices_or_calculations", - "study_id", "text", NA, - "repeat___1", "checkbox", "1, A | 2, B | 3, C", - "repeat___2", "checkbox", "1, A | 2, B | 3, C" -) - -supertbl <- tibble::tribble( - ~"redcap_form_name", ~"redcap_data", ~"redcap_metadata", - "nonrepeat_instrument", nonrepeat_data, nonrepeat_metadata, - "repeat_instrument", repeat_data, repeat_metadata -) - -class(supertbl) <- c("redcap_supertbl", class(supertbl)) - -test_that("combine_checkboxes returns an expected supertbl", { - out <- combine_checkboxes( - supertbl = supertbl, - tbl = "nonrepeat_instrument", - cols = starts_with("multi"), - values_to = "new_col" - ) # values_fill declared - - expect_setequal(class(out), c("redcap_supertbl", "tbl_df", "tbl", "data.frame")) - expect_equal(nrow(out), 2) -}) - -test_that("combine_checkboxes works for nonrepeat instrument", { - out <- combine_checkboxes( - supertbl = supertbl, - tbl = "nonrepeat_instrument", - cols = starts_with("multi"), - values_to = "new_col", - multi_value_label = "multiple", # multi_value_label declared - values_fill = "none" # values_fill declared - ) %>% - pull(redcap_data) %>% - dplyr::first() - - expected_out <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col", - 1, TRUE, FALSE, FALSE, 1, "Red", - 2, TRUE, TRUE, FALSE, 2, "multiple", - 3, FALSE, FALSE, FALSE, 3, "none" - ) %>% - mutate( - new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none")) - ) - - expect_equal(out, expected_out) -}) - -test_that("combine_checkboxes works for nonrepeat instrument and drop old values", { - out <- combine_checkboxes( - supertbl = supertbl, - tbl = "nonrepeat_instrument", - cols = starts_with("multi"), - values_to = "new_col", - keep = FALSE # Test keep = FALSE - ) %>% - pull(redcap_data) %>% - dplyr::first() - - expected_out <- tibble::tribble( - ~"study_id", ~"extra_data", ~"new_col", - 1, 1, "Red", - 2, 2, "Multiple", - 3, 3, NA - ) %>% - mutate( - new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple")) - ) - - expect_equal(out, expected_out) -}) - -test_that("combine_checkboxes works for repeat instrument", { - out <- combine_checkboxes( - supertbl = supertbl, - tbl = "repeat_instrument", - cols = starts_with("repeat"), - values_to = "new_col" - ) %>% - pull(redcap_data) %>% - dplyr::nth(2) - - expected_out <- tibble::tribble( - ~"study_id", ~"redcap_event",~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"new_col", - 1, "event_1", 1, TRUE, FALSE, "A", - 2, "event_1", 1, TRUE, TRUE, "Multiple", - 2, "event_1", 2, FALSE, FALSE, NA - ) %>% - mutate( - new_col = factor(new_col, levels = c("A", "B", "Multiple")) - ) - - expect_equal(out, expected_out) -}) - -test_that("get_metadata_ref works", { - data <- nonrepeat_data %>% - select(study_id, contains("multi")) %>% - mutate(new_data = c(FALSE, TRUE, FALSE)) - - out <- get_metadata_ref( - data = data, - supertbl = supertbl, - tbl = "nonrepeat_instrument", - field_names = c("multi___1", "multi___2", "multi___3") - ) - - expected_out <- tibble::tribble( - ~"field_name", ~"raw", ~"label", - "multi___1", "1", "Red", - "multi___2", "2", "Yellow", - "multi___3", "3", "Blue" - ) - - expect_equal(out, expected_out) -}) - -test_that("replace_true works", { - metadata <- tibble::tribble( - ~"field_name", ~"raw", ~"label", - "multi___1", "1", "Red", - "multi___2", "2", "Yellow", - "multi___3", "3", "Blue" - ) - - out <- replace_true(col = c(TRUE, TRUE, FALSE), col_name = "multi___1", metadata = metadata, raw_or_label = "raw") - expected_out <- c("1", "1", NA) - - expect_equal(out, expected_out) - - out <- replace_true(col = c(TRUE, TRUE, FALSE), col_name = "multi___1", metadata = metadata, raw_or_label = "label") - expected_out <- c("Red", "Red", NA) - - expect_equal(out, expected_out) -}) From 21f8879c90295de0459662ef5cb517cf0d2d9751 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 17 Jul 2024 14:39:15 -0400 Subject: [PATCH 24/39] Remove record_id field, lint --- R/combine_checkboxes.R | 29 ++++++++++++------------ tests/testthat/test-combine_checkboxes.R | 2 +- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index e127486f..9e23b172 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -45,7 +45,6 @@ combine_checkboxes <- function(supertbl, values_fill = NA, raw_or_label = "label", keep = TRUE) { - # Check args --- check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata")) check_arg_is_character(tbl, len = 1, any.missing = FALSE) @@ -66,9 +65,6 @@ combine_checkboxes <- function(supertbl, field_names <- names(eval_select(cols_exp, data = data_tbl)) check_fields_exist(fields = field_names, expr = cols_exp) - # Identify record_id field - record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) - # Define values_to as the count of TRUEs/1s for the given checkbox field # Assign TRUE if multiple selections made, and FALSE if one or zero made data_tbl_mod <- data_tbl %>% @@ -87,9 +83,9 @@ combine_checkboxes <- function(supertbl, mutate(across( field_names, ~ replace_true(.x, - cur_column(), - metadata = metadata, - raw_or_label = raw_or_label + cur_column(), + metadata = metadata, + raw_or_label = raw_or_label ) )) @@ -102,20 +98,24 @@ combine_checkboxes <- function(supertbl, ) %>% mutate( !!values_to := ifelse(!!sym(values_to) == "TRUE", - multi_value_label, - coalesce(!!!syms(field_names)) + multi_value_label, + coalesce(!!!syms(field_names)) ), !!values_to := ifelse(is.na(!!sym(values_to)), - values_fill, - !!sym(values_to)) + values_fill, + !!sym(values_to) + ) ) %>% mutate( !!values_to := factor(!!sym(values_to), - levels = c(metadata[[raw_or_label]], multi_value_label, values_fill)) + levels = c(metadata[[raw_or_label]], multi_value_label, values_fill) + ) ) - final_tbl <- bind_cols(data_tbl, - data_tbl_mod %>% select(!!values_to)) + final_tbl <- bind_cols( + data_tbl, + data_tbl_mod %>% select(!!values_to) + ) # Keep or remove original multi columns if (!keep) { @@ -144,7 +144,6 @@ get_metadata_ref <- function(data, supertbl, tbl, field_names) { - # Create a metadata reference table linking field name to raw and label values out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% filter(.data$field_name %in% field_names) %>% diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index acfa32f8..5c6db630 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -109,7 +109,7 @@ test_that("combine_checkboxes works for repeat instrument", { dplyr::nth(2) expected_out <- tibble::tribble( - ~"study_id", ~"redcap_event",~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", ~"new_col", + ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", ~"new_col", 1, "event_1", 1, TRUE, FALSE, FALSE, "A", 2, "event_1", 1, TRUE, TRUE, TRUE, "Multiple", 2, "event_1", 2, FALSE, FALSE, FALSE, NA From 31797c6099dec9c4a02ef9cca53af36c4e390303 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Thu, 18 Jul 2024 08:41:06 -0400 Subject: [PATCH 25/39] Apply additional cleanup suggestions --- R/combine_checkboxes.R | 24 +++++++++--------------- man/get_metadata_ref.Rd | 9 ++------- tests/testthat/test-combine_checkboxes.R | 8 +------- 3 files changed, 12 insertions(+), 29 deletions(-) diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 9e23b172..4f61625c 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -76,7 +76,8 @@ combine_checkboxes <- function(supertbl, ) # Get metadata reference table, check that chosen fields are checkboxes - metadata <- get_metadata_ref(data_tbl_mod, supertbl, tbl, field_names) + metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] + metadata_ref <- get_metadata_ref(metadata_tbl, field_names) # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% @@ -84,20 +85,18 @@ combine_checkboxes <- function(supertbl, field_names, ~ replace_true(.x, cur_column(), - metadata = metadata, + metadata = metadata_ref, raw_or_label = raw_or_label ) )) - # Convert values_to from TRUE/FALSE to multi_value_label or identified single val data_tbl_mod <- data_tbl_mod %>% mutate( - across(field_names, as.character), # enforce to character strings - across(!!values_to, ~ as.character(.)) + across(field_names, as.character) # enforce to character strings ) %>% mutate( - !!values_to := ifelse(!!sym(values_to) == "TRUE", + !!values_to := ifelse(!!sym(values_to), multi_value_label, coalesce(!!!syms(field_names)) ), @@ -108,7 +107,7 @@ combine_checkboxes <- function(supertbl, ) %>% mutate( !!values_to := factor(!!sym(values_to), - levels = c(metadata[[raw_or_label]], multi_value_label, values_fill) + levels = c(metadata_ref[[raw_or_label]], multi_value_label, values_fill) ) ) @@ -131,21 +130,16 @@ combine_checkboxes <- function(supertbl, #' @title Utility function for getting metadata raw and label values for checkboxes #' -#' @param data a data tibble -#' @param supertbl A supertibble generated by [read_redcap()]. -#' @param tbl The name of the REDCap form (instrument) containing the checkbox -#' fields. +#' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()]. #' @param field_names Character string vector of field names for checkbox combination #' #' @returns a tibble #' #' @keywords internal -get_metadata_ref <- function(data, - supertbl, - tbl, +get_metadata_ref <- function(metadata_tbl, field_names) { # Create a metadata reference table linking field name to raw and label values - out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>% + out <- metadata_tbl %>% filter(.data$field_name %in% field_names) %>% # TODO: original_field a temporary placeholder for future multi-field and mapping dev mutate( diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd index afe895d9..2c09c4bb 100644 --- a/man/get_metadata_ref.Rd +++ b/man/get_metadata_ref.Rd @@ -4,15 +4,10 @@ \alias{get_metadata_ref} \title{Utility function for getting metadata raw and label values for checkboxes} \usage{ -get_metadata_ref(data, supertbl, tbl, field_names) +get_metadata_ref(metadata_tbl, field_names) } \arguments{ -\item{data}{a data tibble} - -\item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} - -\item{tbl}{The name of the REDCap form (instrument) containing the checkbox -fields.} +\item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} \item{field_names}{Character string vector of field names for checkbox combination} } diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 5c6db630..45abb426 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -122,14 +122,8 @@ test_that("combine_checkboxes works for repeat instrument", { }) test_that("get_metadata_ref works", { - data <- nonrepeat_data %>% - select(study_id, contains("multi")) %>% - mutate(new_data = c(FALSE, TRUE, FALSE)) - out <- get_metadata_ref( - data = data, - supertbl = supertbl, - tbl = "nonrepeat_instrument", + metadata_tbl = supertbl$redcap_metadata[[1]], field_names = c("multi___1", "multi___2", "multi___3") ) From 2dfac9a2261113eb57bd40b41d97ea277c4ece0a Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Thu, 18 Jul 2024 09:15:51 -0400 Subject: [PATCH 26/39] Add extract_metadata fnctn, tests --- R/utils.R | 20 +++++++++++++++++++ man/extract_metadata_tibble.Rd | 22 +++++++++++++++++++++ tests/testthat/test-utils.R | 35 ++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+) create mode 100644 man/extract_metadata_tibble.Rd diff --git a/R/utils.R b/R/utils.R index f6a04763..32dc3a78 100644 --- a/R/utils.R +++ b/R/utils.R @@ -954,3 +954,23 @@ safe_set_variable_labels <- function(data, labs) { labs_to_keep <- intersect(names(labs), colnames(data)) labelled::set_variable_labels(data, !!!labs[labs_to_keep]) } + +#' @title +#' Extract a specific metadata tibble from a supertibble +#' +#' @description +#' Utility function to extract a specific metadata tibble from a supertibble +#' given a `redcap_form_name` +#' +#' @param supertbl A supertibble generated by [read_redcap()]. +#' @param redcap_form_name A character string identifying the `redcap_form_name` +#' the metadata tibble is associated with. +#' +#' @return +#' A tibble +#' +#' @keywords internal + +extract_metadata_tibble <- function(supertbl, redcap_form_name) { + supertbl$redcap_metadata[supertbl$redcap_form_name == redcap_form_name][[1]] +} diff --git a/man/extract_metadata_tibble.Rd b/man/extract_metadata_tibble.Rd new file mode 100644 index 00000000..0623b75d --- /dev/null +++ b/man/extract_metadata_tibble.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{extract_metadata_tibble} +\alias{extract_metadata_tibble} +\title{Extract a specific metadata tibble from a supertibble} +\usage{ +extract_metadata_tibble(supertbl, redcap_form_name) +} +\arguments{ +\item{supertbl}{A supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} + +\item{redcap_form_name}{A character string identifying the \code{redcap_form_name} +the metadata tibble is associated with.} +} +\value{ +A tibble +} +\description{ +Utility function to extract a specific metadata tibble from a supertibble +given a \code{redcap_form_name} +} +\keyword{internal} diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index e5b7f9a2..2fe31dbd 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -496,3 +496,38 @@ test_that("apply_labs_factor works", { test_that("force_cast converts non chr/numerics to chr", { expect_character(force_cast("2023-01-01", as.Date(NA))) }) + +test_that("get_record_id_field works", { + data_tbl <- tibble::tribble( + ~"test_name", ~"test_value", + 1, 2 + ) + + expect_equal(get_record_id_field(data_tbl), "test_name") +}) + +test_that("extract_metadata_tibble works", { + inst_1_metadata <- tibble::tribble( + ~"field_name", ~"field_type", + "study_id", "text", + "text", "text", + ) + + inst_2_metadata <- tibble::tribble( + ~"field_name", ~"field_type", + "study_id", "text", + "calulated", "calc", + ) + + supertbl <- tibble::tribble( + ~"redcap_form_name", ~"redcap_metadata", + "inst_1", inst_1_metadata, + "inst_2", inst_2_metadata + ) + + class(supertbl) <- c("redcap_supertbl", class(supertbl)) + + out <- extract_metadata_tibble(supertbl = supertbl, redcap_form_name = "inst_1") + + expect_equal(out, inst_1_metadata) +}) From ed552925214231e06832a06948ee39296dcfe009 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 24 Jul 2024 16:38:29 -0400 Subject: [PATCH 27/39] Support multiple values_to, logicals, new checks --- R/checks.R | 29 ++++++ R/combine_checkboxes.R | 118 +++++++++++++---------- man/check_values_to_length.Rd | 19 ++++ man/get_metadata_ref.Rd | 4 +- tests/testthat/test-checks.R | 19 ++++ tests/testthat/test-combine_checkboxes.R | 87 +++++++++++++---- 6 files changed, 207 insertions(+), 69 deletions(-) create mode 100644 man/check_values_to_length.Rd diff --git a/R/checks.R b/R/checks.R index e763c864..b34102b1 100644 --- a/R/checks.R +++ b/R/checks.R @@ -712,3 +712,32 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) { ) } } + +#' @title +#' Check values_to length against detected number of checkbox fields +#' +#' @param col_groups a list of column groups identified by checkbox field detection +#' @param values_to a user defined character vector passed from [combine_checkboxes()] +#' @param call The calling environment to use in the error message +#' +#' @keywords internal +check_values_to_length <- function(col_groups, values_to, call = caller_env()) { + if (length(values_to) < length(names(col_groups))) { + cli_warn( + message = c( + `!` = "Detected fewer {.code values_to} arguments than the number of checkbox fields. Only the first {length(values_to)} will be used." + ), + class = c("checkbox_value_to_length", "REDCapTidieR_cond") + ) + } + + if (length(values_to) > length(names(col_groups))) { + cli_abort( + message = c( + `x` = "The number of {.code values_to} arguments supplied is greater than the number of checkbox fields detected.", + `i` = "{length(values_to)} {.code values_to} supplied, {length(names(col_groups))} checkbox fields detected." + ), + class = c("checkbox_value_to_length", "REDCapTidieR_cond") + ) + } +} diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 4f61625c..50dba153 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -48,68 +48,81 @@ combine_checkboxes <- function(supertbl, # Check args --- check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata")) check_arg_is_character(tbl, len = 1, any.missing = FALSE) - check_arg_is_character(values_to, len = 1, any.missing = FALSE) + check_arg_is_character(values_to, any.missing = FALSE) check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE) check_arg_is_character(values_fill, len = 1, any.missing = TRUE) check_arg_choices(raw_or_label, choices = c("label", "raw")) check_arg_is_logical(keep, len = 1, any.missing = FALSE) - # Save user cols to enquosure - cols_exp <- enquo(cols) - # Extract tbl from supertbl data_tbl <- supertbl %>% extract_tibble(tbl) - # Get field names from cols_exp, check that fields exist - field_names <- names(eval_select(cols_exp, data = data_tbl)) - check_fields_exist(fields = field_names, expr = cols_exp) + # Save user cols to quosure + cols_exp <- enquo(cols) - # Define values_to as the count of TRUEs/1s for the given checkbox field - # Assign TRUE if multiple selections made, and FALSE if one or zero made - data_tbl_mod <- data_tbl %>% - mutate( - !!values_to := case_when( - rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE, - TRUE ~ FALSE - ) - ) + # Evaluate the cols expression to get the selected column names + selected_cols <- names(eval_select(cols_exp, data = data_tbl)) + check_fields_exist(fields = selected_cols, expr = cols_exp) # Check supplied fields exist + + # Extract the prefix of each selected column + prefixes <- sub("___.*", "", selected_cols) + + # Split the selected columns based on their prefixes + col_groups <- split(selected_cols, prefixes) + check_values_to_length(col_groups, values_to) # Check values_to columns match length of fields # Get metadata reference table, check that chosen fields are checkboxes metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] - metadata_ref <- get_metadata_ref(metadata_tbl, field_names) + metadata_ref <- get_metadata_ref(metadata_tbl, selected_cols) + + # Define values_to as the count of TRUEs/1s for the given checkbox field + # Assign TRUE if multiple selections made, and FALSE if one or zero made + data_tbl_mod <- data_tbl + + for (i in seq_along(values_to)) { + data_tbl_mod <- data_tbl_mod %>% + mutate( + !!values_to[i] := case_when( + rowSums(select(., col_groups[[i]])) > 1 ~ TRUE, + .default = FALSE + ) + ) + } # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% mutate(across( - field_names, + selected_cols, ~ replace_true(.x, - cur_column(), - metadata = metadata_ref, - raw_or_label = raw_or_label + cur_column(), + metadata = metadata_ref, + raw_or_label = raw_or_label ) - )) + ), + across(selected_cols, as.character) # enforce to character strings + ) - # Convert values_to from TRUE/FALSE to multi_value_label or identified single val - data_tbl_mod <- data_tbl_mod %>% - mutate( - across(field_names, as.character) # enforce to character strings - ) %>% - mutate( - !!values_to := ifelse(!!sym(values_to), - multi_value_label, - coalesce(!!!syms(field_names)) - ), - !!values_to := ifelse(is.na(!!sym(values_to)), - values_fill, - !!sym(values_to) + for (i in seq_along(values_to)) { + metadata_overwrite <- metadata_ref %>% filter(field_name %in% col_groups[[i]]) %>% pull(raw_or_label) + + data_tbl_mod <- data_tbl_mod %>% + mutate( + !!values_to[i] := ifelse(!!sym(values_to[i]), + multi_value_label, + coalesce(!!!syms(col_groups[[i]])) + ), + !!values_to[i] := ifelse(is.na(!!sym(values_to[i])), + values_fill, + !!sym(values_to[i]) + ) + ) %>% + mutate( + !!values_to[i] := factor(!!sym(values_to[i]), + levels = c(metadata_overwrite, multi_value_label, values_fill) + ) ) - ) %>% - mutate( - !!values_to := factor(!!sym(values_to), - levels = c(metadata_ref[[raw_or_label]], multi_value_label, values_fill) - ) - ) + } final_tbl <- bind_cols( data_tbl, @@ -119,7 +132,7 @@ combine_checkboxes <- function(supertbl, # Keep or remove original multi columns if (!keep) { final_tbl <- final_tbl %>% - select(-field_names) + select(-selected_cols) } # Update the supertbl data tibble @@ -131,17 +144,16 @@ combine_checkboxes <- function(supertbl, #' @title Utility function for getting metadata raw and label values for checkboxes #' #' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()]. -#' @param field_names Character string vector of field names for checkbox combination +#' @param selected_cols Character string vector of field names for checkbox combination #' #' @returns a tibble #' #' @keywords internal get_metadata_ref <- function(metadata_tbl, - field_names) { + selected_cols) { # Create a metadata reference table linking field name to raw and label values out <- metadata_tbl %>% - filter(.data$field_name %in% field_names) %>% - # TODO: original_field a temporary placeholder for future multi-field and mapping dev + filter(.data$field_name %in% selected_cols) %>% mutate( original_field = sub("___.*$", "", .data$field_name) ) @@ -149,11 +161,19 @@ get_metadata_ref <- function(metadata_tbl, # Make sure selection is checkbox metadata field type check_fields_are_checkboxes(out) - # TODO: Make more robust for multi-field and mapping, using original_field above - parsed_vals <- parse_labels(first(out$select_choices_or_calculations)) + # Bind raw/label values per original field grouping + parsed_vals <- tibble() + + for (i in seq_along(unique(out$original_field))) { + index <- unique(out$original_field)[i] + out_filtered <- out %>% filter(original_field == index) + + parsed_vals <- rbind(parsed_vals, parse_labels(first(out_filtered$select_choices_or_calculations))) + } bind_cols(out, parsed_vals) %>% - select(.data$field_name, .data$raw, .data$label) + select(.data$field_name, .data$raw, .data$label, original_field) %>% + relocate(original_field, .after = field_name) } #' @noRd diff --git a/man/check_values_to_length.Rd b/man/check_values_to_length.Rd new file mode 100644 index 00000000..e9194a08 --- /dev/null +++ b/man/check_values_to_length.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_values_to_length} +\alias{check_values_to_length} +\title{Check values_to length against detected number of checkbox fields} +\usage{ +check_values_to_length(col_groups, values_to, call = caller_env()) +} +\arguments{ +\item{col_groups}{a list of column groups identified by checkbox field detection} + +\item{values_to}{a user defined character vector passed from \code{\link[=combine_checkboxes]{combine_checkboxes()}}} + +\item{call}{The calling environment to use in the error message} +} +\description{ +Check values_to length against detected number of checkbox fields +} +\keyword{internal} diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd index 2c09c4bb..6ced220b 100644 --- a/man/get_metadata_ref.Rd +++ b/man/get_metadata_ref.Rd @@ -4,12 +4,12 @@ \alias{get_metadata_ref} \title{Utility function for getting metadata raw and label values for checkboxes} \usage{ -get_metadata_ref(metadata_tbl, field_names) +get_metadata_ref(metadata_tbl, selected_cols) } \arguments{ \item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} -\item{field_names}{Character string vector of field names for checkbox combination} +\item{selected_cols}{Character string vector of field names for checkbox combination} } \value{ a tibble diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 77435375..3fc71072 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -272,3 +272,22 @@ test_that("check_fields_are_checkboxes works", { expect_error(check_fields_are_checkboxes(metadata), class = "non_checkbox_fields") expect_no_error(check_fields_are_checkboxes(metadata_filtered)) }) + +test_that("check_values_to_length length works", { + col_groups <- list( + checkbox_1 = c("checkbox1___1", "checkbox1___2"), + checkbox_2 = c("checkbox2___1") + ) + + values_to <- c("new_col1", "new_col2") + + expect_no_message(check_values_to_length(col_groups, values_to)) + + values_to_warn <- "new_col1" + + expect_warning(check_values_to_length(col_groups, values_to_warn), class = "checkbox_value_to_length") + + values_to_error <- c("new_col1", "new_col2", "new_col3") + + expect_error(check_values_to_length(col_groups, values_to_error), class = "checkbox_value_to_length") +}) diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 45abb426..bb7af8f2 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -1,8 +1,8 @@ nonrepeat_data <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", - 1, TRUE, FALSE, FALSE, 1, - 2, TRUE, TRUE, FALSE, 2, - 3, FALSE, FALSE, FALSE, 3 + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", + 1, TRUE, FALSE, FALSE, TRUE, 1, + 2, TRUE, TRUE, FALSE, TRUE, 2, + 3, FALSE, FALSE, FALSE, FALSE, 3 ) nonrepeat_metadata <- tibble::tribble( @@ -11,7 +11,8 @@ nonrepeat_metadata <- tibble::tribble( "multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue", "multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue", "multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue", - "extra_data", "dropdown", "1, 1 | 2, 2 | 3,3" + "single_checkbox___1", "checkbox", "4, Green", + "extra_data", "dropdown", "1, 1 | 2, 2 | 3, 3" ) repeat_data <- tibble::tribble( @@ -62,10 +63,10 @@ test_that("combine_checkboxes works for nonrepeat instrument", { dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col", - 1, TRUE, FALSE, FALSE, 1, "Red", - 2, TRUE, TRUE, FALSE, 2, "multiple", - 3, FALSE, FALSE, FALSE, 3, "none" + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"new_col", + 1, TRUE, FALSE, FALSE, TRUE, 1, "Red", + 2, TRUE, TRUE, FALSE, TRUE, 2, "multiple", + 3, FALSE, FALSE, FALSE, FALSE, 3, "none" ) %>% mutate( new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none")) @@ -86,10 +87,10 @@ test_that("combine_checkboxes works for nonrepeat instrument and drop old values dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"extra_data", ~"new_col", - 1, 1, "Red", - 2, 2, "Multiple", - 3, 3, NA + ~"study_id", ~"single_checkbox___1", ~"extra_data", ~"new_col", + 1, TRUE, 1, "Red", + 2, TRUE, 2, "Multiple", + 3, FALSE, 3, NA ) %>% mutate( new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple")) @@ -124,14 +125,14 @@ test_that("combine_checkboxes works for repeat instrument", { test_that("get_metadata_ref works", { out <- get_metadata_ref( metadata_tbl = supertbl$redcap_metadata[[1]], - field_names = c("multi___1", "multi___2", "multi___3") + selected_cols = c("multi___1", "multi___2", "multi___3") ) expected_out <- tibble::tribble( - ~"field_name", ~"raw", ~"label", - "multi___1", "1", "Red", - "multi___2", "2", "Yellow", - "multi___3", "3", "Blue" + ~"field_name", ~"original_field", ~"raw", ~"label", + "multi___1", "multi", "1", "Red", + "multi___2", "multi", "2", "Yellow", + "multi___3", "multi", "3", "Blue" ) expect_equal(out, expected_out) @@ -155,3 +156,53 @@ test_that("replace_true works", { expect_equal(out, expected_out) }) + +test_that("combine_checkboxes works for multiple checkbox fields", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = c(starts_with("multi"), starts_with("single_checkbox")), + values_to = c("new_col1", "new_col2"), + keep = FALSE + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id",~"extra_data", ~"new_col1", ~"new_col2", + 1, 1, "Red", "Green", + 2, 2, "Multiple", "Green", + 3, 3, NA, NA + ) %>% + mutate( + new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")), + new_col2 = factor(new_col2, levels = c("Green", "Multiple")) + ) + + expect_equal(out, expected_out) +}) + +test_that("combine_checkboxes works for multiple checkbox fields with logicals", { + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = c(starts_with("multi")| starts_with("single_checkbox")), + values_to = c("new_col1", "new_col2"), + keep = FALSE + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id",~"extra_data", ~"new_col1", ~"new_col2", + 1, 1, "Red", "Green", + 2, 2, "Multiple", "Green", + 3, 3, NA, NA + ) %>% + mutate( + new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")), + new_col2 = factor(new_col2, levels = c("Green", "Multiple")) + ) + + expect_equal(out, expected_out) +}) From c0b38853f780fd05304a1246980a5cd6e1ebdbf9 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Wed, 24 Jul 2024 17:07:11 -0400 Subject: [PATCH 28/39] Linting --- R/checks.R | 4 +-- R/combine_checkboxes.R | 39 +++++++++++++----------- tests/testthat/test-combine_checkboxes.R | 6 ++-- 3 files changed, 26 insertions(+), 23 deletions(-) diff --git a/R/checks.R b/R/checks.R index b34102b1..940afafe 100644 --- a/R/checks.R +++ b/R/checks.R @@ -725,7 +725,7 @@ check_values_to_length <- function(col_groups, values_to, call = caller_env()) { if (length(values_to) < length(names(col_groups))) { cli_warn( message = c( - `!` = "Detected fewer {.code values_to} arguments than the number of checkbox fields. Only the first {length(values_to)} will be used." + `!` = "Detected fewer {.code values_to} arguments than the number of checkbox fields. Only the first {length(values_to)} will be used." # nolint line_length_linter ), class = c("checkbox_value_to_length", "REDCapTidieR_cond") ) @@ -734,7 +734,7 @@ check_values_to_length <- function(col_groups, values_to, call = caller_env()) { if (length(values_to) > length(names(col_groups))) { cli_abort( message = c( - `x` = "The number of {.code values_to} arguments supplied is greater than the number of checkbox fields detected.", + `x` = "The number of {.code values_to} arguments supplied is greater than the number of checkbox fields detected.", # nolint line_length_linter `i` = "{length(values_to)} {.code values_to} supplied, {length(names(col_groups))} checkbox fields detected." ), class = c("checkbox_value_to_length", "REDCapTidieR_cond") diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 50dba153..df799b77 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -92,34 +92,37 @@ combine_checkboxes <- function(supertbl, # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% - mutate(across( - selected_cols, - ~ replace_true(.x, - cur_column(), - metadata = metadata_ref, - raw_or_label = raw_or_label - ) - ), - across(selected_cols, as.character) # enforce to character strings + mutate( + across( + selected_cols, + ~ replace_true(.x, + cur_column(), + metadata = metadata_ref, + raw_or_label = raw_or_label + ) + ), + across(selected_cols, as.character) # enforce to character strings ) for (i in seq_along(values_to)) { - metadata_overwrite <- metadata_ref %>% filter(field_name %in% col_groups[[i]]) %>% pull(raw_or_label) + metadata_overwrite <- metadata_ref %>% + filter(.data$field_name %in% col_groups[[i]]) %>% + pull(raw_or_label) data_tbl_mod <- data_tbl_mod %>% mutate( !!values_to[i] := ifelse(!!sym(values_to[i]), - multi_value_label, - coalesce(!!!syms(col_groups[[i]])) + multi_value_label, + coalesce(!!!syms(col_groups[[i]])) ), !!values_to[i] := ifelse(is.na(!!sym(values_to[i])), - values_fill, - !!sym(values_to[i]) + values_fill, + !!sym(values_to[i]) ) ) %>% mutate( !!values_to[i] := factor(!!sym(values_to[i]), - levels = c(metadata_overwrite, multi_value_label, values_fill) + levels = c(metadata_overwrite, multi_value_label, values_fill) ) ) } @@ -166,14 +169,14 @@ get_metadata_ref <- function(metadata_tbl, for (i in seq_along(unique(out$original_field))) { index <- unique(out$original_field)[i] - out_filtered <- out %>% filter(original_field == index) + out_filtered <- out %>% filter(.data$original_field == index) parsed_vals <- rbind(parsed_vals, parse_labels(first(out_filtered$select_choices_or_calculations))) } bind_cols(out, parsed_vals) %>% - select(.data$field_name, .data$raw, .data$label, original_field) %>% - relocate(original_field, .after = field_name) + select(.data$field_name, .data$raw, .data$label, .data$original_field) %>% + relocate(.data$original_field, .after = .data$field_name) } #' @noRd diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index bb7af8f2..5132c2e5 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -169,7 +169,7 @@ test_that("combine_checkboxes works for multiple checkbox fields", { dplyr::first() expected_out <- tibble::tribble( - ~"study_id",~"extra_data", ~"new_col1", ~"new_col2", + ~"study_id", ~"extra_data", ~"new_col1", ~"new_col2", 1, 1, "Red", "Green", 2, 2, "Multiple", "Green", 3, 3, NA, NA @@ -186,7 +186,7 @@ test_that("combine_checkboxes works for multiple checkbox fields with logicals", out <- combine_checkboxes( supertbl = supertbl, tbl = "nonrepeat_instrument", - cols = c(starts_with("multi")| starts_with("single_checkbox")), + cols = c(starts_with("multi") | starts_with("single_checkbox")), values_to = c("new_col1", "new_col2"), keep = FALSE ) %>% @@ -194,7 +194,7 @@ test_that("combine_checkboxes works for multiple checkbox fields with logicals", dplyr::first() expected_out <- tibble::tribble( - ~"study_id",~"extra_data", ~"new_col1", ~"new_col2", + ~"study_id", ~"extra_data", ~"new_col1", ~"new_col2", 1, 1, "Red", "Green", 2, 2, "Multiple", "Green", 3, 3, NA, NA From 7789a228b8514a09450a96a82622338ccea44656 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 29 Jul 2024 15:51:52 -0400 Subject: [PATCH 29/39] Update API, clean up, new methods, new docs --- NAMESPACE | 1 + R/REDCapTidieR-package.R | 2 +- R/checks.R | 31 +---- R/combine_checkboxes.R | 159 +++++++++++++++-------- man/check_fields_exist.Rd | 2 +- man/check_values_to_length.Rd | 19 --- man/combine_checkboxes.Rd | 17 ++- man/convert_metadata_spec.Rd | 43 ++++++ man/get_metadata_ref.Rd | 20 --- man/get_metadata_spec.Rd | 33 +++++ man/replace_true.Rd | 25 ++++ tests/testthat/test-checks.R | 19 --- tests/testthat/test-combine_checkboxes.R | 82 ++++++++---- 13 files changed, 276 insertions(+), 177 deletions(-) delete mode 100644 man/check_values_to_length.Rd create mode 100644 man/convert_metadata_spec.Rd delete mode 100644 man/get_metadata_ref.Rd create mode 100644 man/get_metadata_spec.Rd create mode 100644 man/replace_true.Rd diff --git a/NAMESPACE b/NAMESPACE index e48eab82..a9fbdb2a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -81,6 +81,7 @@ importFrom(purrr,map_lgl) importFrom(purrr,pluck) importFrom(purrr,pmap) importFrom(purrr,pmap_chr) +importFrom(purrr,reduce) importFrom(purrr,some) importFrom(readr,parse_character) importFrom(readr,parse_date) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index f942d740..16d54f28 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -11,7 +11,7 @@ #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date #' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some pmap -#' discard flatten_chr map2_chr +#' discard flatten_chr map2_chr reduce #' @importFrom REDCapR redcap_arm_export redcap_event_instruments redcap_instruments #' redcap_metadata_read redcap_read_oneshot sanitize_token #' @importFrom rlang .data !!! abort as_closure caller_arg caller_env catch_cnd diff --git a/R/checks.R b/R/checks.R index 940afafe..46758d71 100644 --- a/R/checks.R +++ b/R/checks.R @@ -664,7 +664,7 @@ check_extra_field_values_message <- function(extra_field_values, call = caller_e #' Check fields exist for checkbox combination #' #' @param fields Vector of character strings to check the length of -#' @param expr A quosure expression +#' @param expr An expression #' @param call The calling environment to use in the error message #' #' @keywords internal @@ -712,32 +712,3 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) { ) } } - -#' @title -#' Check values_to length against detected number of checkbox fields -#' -#' @param col_groups a list of column groups identified by checkbox field detection -#' @param values_to a user defined character vector passed from [combine_checkboxes()] -#' @param call The calling environment to use in the error message -#' -#' @keywords internal -check_values_to_length <- function(col_groups, values_to, call = caller_env()) { - if (length(values_to) < length(names(col_groups))) { - cli_warn( - message = c( - `!` = "Detected fewer {.code values_to} arguments than the number of checkbox fields. Only the first {length(values_to)} will be used." # nolint line_length_linter - ), - class = c("checkbox_value_to_length", "REDCapTidieR_cond") - ) - } - - if (length(values_to) > length(names(col_groups))) { - cli_abort( - message = c( - `x` = "The number of {.code values_to} arguments supplied is greater than the number of checkbox fields detected.", # nolint line_length_linter - `i` = "{length(values_to)} {.code values_to} supplied, {length(names(col_groups))} checkbox fields detected." - ), - class = c("checkbox_value_to_length", "REDCapTidieR_cond") - ) - } -} diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index df799b77..e004d185 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -8,10 +8,12 @@ #' #' @param supertbl A supertibble generated by [read_redcap()]. Required. #' @param tbl The `redcap_form_name` of the data tibble to extract. Required. -#' @param cols <[`tidy-select`][tidyr_tidy_select]> Checbox columns to combine to +#' @param cols <[`tidy-select`][tidyr_tidy_select]> Checkbox columns to combine to #' single column. Required. -#' @param values_to A string specifying the name of the column to combine checkbox -#' values under. Required. +#' @param names_prefix String added to the start of every variable name. +#' @param names_suffix String added to the end of every variable name. +#' @param names_sep String to separate new column names from `names_prefix` and/or +#' `names_suffix`. #' @param multi_value_label A string specifying the value to be used when multiple #' checkbox fields are selected. Default "Multiple". #' @param values_fill Value to use when no checkboxes are selected. Default `NA`. @@ -25,11 +27,10 @@ #' @examples #' \dontrun{ #' supertbl <- read_redcap(redcap_uri, token) -#' combined_tbl <- combine_checkboxes( +#' combine_checkboxes( #' supertbl = supertbl, #' tbl = "demographics", #' cols = starts_with("race"), -#' values_to = "race_combined", #' multi_value_label = "Multiple", #' values_fill = NA #' ) @@ -40,15 +41,19 @@ combine_checkboxes <- function(supertbl, tbl, cols, - values_to, + names_prefix = "", + names_suffix = NULL, + names_sep = "_", multi_value_label = "Multiple", values_fill = NA, raw_or_label = "label", keep = TRUE) { # Check args --- check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata")) + check_arg_is_character(names_prefix, len = 1) + check_arg_is_character(names_suffix, len = 1, null.ok = TRUE) + check_arg_is_character(names_sep, len = 1, any.missing = TRUE) check_arg_is_character(tbl, len = 1, any.missing = FALSE) - check_arg_is_character(values_to, any.missing = FALSE) check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE) check_arg_is_character(values_fill, len = 1, any.missing = TRUE) check_arg_choices(raw_or_label, choices = c("label", "raw")) @@ -58,33 +63,29 @@ combine_checkboxes <- function(supertbl, data_tbl <- supertbl %>% extract_tibble(tbl) - # Save user cols to quosure + # Save user cols to quo cols_exp <- enquo(cols) # Evaluate the cols expression to get the selected column names selected_cols <- names(eval_select(cols_exp, data = data_tbl)) check_fields_exist(fields = selected_cols, expr = cols_exp) # Check supplied fields exist - # Extract the prefix of each selected column - prefixes <- sub("___.*", "", selected_cols) - - # Split the selected columns based on their prefixes - col_groups <- split(selected_cols, prefixes) - check_values_to_length(col_groups, values_to) # Check values_to columns match length of fields - # Get metadata reference table, check that chosen fields are checkboxes metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] - metadata_ref <- get_metadata_ref(metadata_tbl, selected_cols) + metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_suffix, names_sep) - # Define values_to as the count of TRUEs/1s for the given checkbox field + # Define .new_col as the count of TRUEs/1s for the given checkbox field # Assign TRUE if multiple selections made, and FALSE if one or zero made data_tbl_mod <- data_tbl + .new_col <- unique(metadata_spec$.new_value) + + for (i in seq_along(.new_col)) { + cols_to_sum <- metadata_spec$field_name[metadata_spec$.new_value == .new_col[i]] # nolint: object_usage_linter - for (i in seq_along(values_to)) { data_tbl_mod <- data_tbl_mod %>% mutate( - !!values_to[i] := case_when( - rowSums(select(., col_groups[[i]])) > 1 ~ TRUE, + !!.new_col[i] := case_when( + rowSums(select(., cols_to_sum)) > 1 ~ TRUE, .default = FALSE ) ) @@ -97,39 +98,21 @@ combine_checkboxes <- function(supertbl, selected_cols, ~ replace_true(.x, cur_column(), - metadata = metadata_ref, + metadata = metadata_spec, raw_or_label = raw_or_label ) ), across(selected_cols, as.character) # enforce to character strings ) - for (i in seq_along(values_to)) { - metadata_overwrite <- metadata_ref %>% - filter(.data$field_name %in% col_groups[[i]]) %>% - pull(raw_or_label) - - data_tbl_mod <- data_tbl_mod %>% - mutate( - !!values_to[i] := ifelse(!!sym(values_to[i]), - multi_value_label, - coalesce(!!!syms(col_groups[[i]])) - ), - !!values_to[i] := ifelse(is.na(!!sym(values_to[i])), - values_fill, - !!sym(values_to[i]) - ) - ) %>% - mutate( - !!values_to[i] := factor(!!sym(values_to[i]), - levels = c(metadata_overwrite, multi_value_label, values_fill) - ) - ) - } + # Use the metadata_spec table to fill values in .new_col + data_tbl_mod <- reduce(.new_col, function(tbl, col_item) { + convert_metadata_spec(col_item, metadata_spec, tbl, raw_or_label, multi_value_label, values_fill) + }, .init = data_tbl_mod) final_tbl <- bind_cols( data_tbl, - data_tbl_mod %>% select(!!values_to) + data_tbl_mod %>% select(!!.new_col) ) # Keep or remove original multi columns @@ -144,21 +127,28 @@ combine_checkboxes <- function(supertbl, supertbl } -#' @title Utility function for getting metadata raw and label values for checkboxes +#' @title Get metadata specification table #' +#' @inheritParams combine_checkboxes #' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()]. #' @param selected_cols Character string vector of field names for checkbox combination #' #' @returns a tibble #' #' @keywords internal -get_metadata_ref <- function(metadata_tbl, - selected_cols) { +get_metadata_spec <- function(metadata_tbl, + selected_cols, + names_prefix, + names_suffix, + names_sep) { # Create a metadata reference table linking field name to raw and label values out <- metadata_tbl %>% filter(.data$field_name %in% selected_cols) %>% mutate( - original_field = sub("___.*$", "", .data$field_name) + .value = sub("___.*$", "", .data$field_name), + .new_value = case_when(!is.null(names_suffix) ~ paste(names_prefix, .value, names_suffix, sep = names_sep), + .default = paste(names_prefix, .data$.value, sep = names_sep) + ) ) # Make sure selection is checkbox metadata field type @@ -167,19 +157,30 @@ get_metadata_ref <- function(metadata_tbl, # Bind raw/label values per original field grouping parsed_vals <- tibble() - for (i in seq_along(unique(out$original_field))) { - index <- unique(out$original_field)[i] - out_filtered <- out %>% filter(.data$original_field == index) + for (i in seq_along(unique(out$.value))) { + index <- unique(out$.value)[i] + out_filtered <- out %>% filter(.data$.value == index) parsed_vals <- rbind(parsed_vals, parse_labels(first(out_filtered$select_choices_or_calculations))) } bind_cols(out, parsed_vals) %>% - select(.data$field_name, .data$raw, .data$label, .data$original_field) %>% - relocate(.data$original_field, .after = .data$field_name) + select(.data$field_name, .data$raw, .data$label, .data$.value, .data$.new_value) %>% + relocate(c(.data$.value, .data$.new_value), .after = .data$field_name) } -#' @noRd +#' @title Replace checkbox TRUEs with raw_or_label values +#' +#' @inheritParams combine_checkboxes +#' @param col A vector +#' @param col_name A string +#' @param metadata A metadata tibble from the original supertibble +#' +#' @description +#' Simple utility function for replacing checkbox field values. +#' +#' @returns A character string +#' #' @keywords internal replace_true <- function(col, col_name, metadata, raw_or_label) { # Replace TRUEs/1s with the appropriate raw or label value from the metadata @@ -190,3 +191,53 @@ replace_true <- function(col, col_name, metadata, raw_or_label) { # Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values return(col) } + +#' @title Use metadata_spec to convert new column values +#' +#' @description +#' [convert_metadata_spec()] uses the `metadata_spec` table provided by [get_metadata_spec()] +#' to automatically convert new column values to either: +#' +#' - A `raw_or_label` checkbox value when only a single value is detected +#' - `mult_value_label` when multiple values are detected +#' - `values_fill` when `NA` is detected +#' +#' @inheritParams combine_checkboxes +#' @param .new_col_item A character string +#' @param metadata_spec A tibble output from [convert_metadata_spec()] +#' @param data_tbl_mod A modified data tibble +#' +#' @returns a tibble +#' +#' @keywords internal +convert_metadata_spec <- function(.new_col_item, + metadata_spec, + data_tbl_mod, + raw_or_label, + multi_value_label, + values_fill) { + .col_group <- metadata_spec$field_name[metadata_spec$.new_value == .new_col_item] + + metadata_overwrite <- metadata_spec %>% + filter(.data$field_name %in% .col_group) %>% + pull(raw_or_label) + + data_tbl_mod <- data_tbl_mod %>% + mutate( + !!.new_col_item := ifelse(!!sym(.new_col_item), + multi_value_label, + coalesce(!!!syms(.col_group)) + ), + !!.new_col_item := ifelse(is.na(!!sym(.new_col_item)), + values_fill, + !!sym(.new_col_item) + ) + ) %>% + mutate( + !!.new_col_item := factor(!!sym(.new_col_item), + levels = c(metadata_overwrite, multi_value_label, values_fill) + ) + ) + + return(data_tbl_mod) +} diff --git a/man/check_fields_exist.Rd b/man/check_fields_exist.Rd index 5076a1b7..8c65e2d7 100644 --- a/man/check_fields_exist.Rd +++ b/man/check_fields_exist.Rd @@ -9,7 +9,7 @@ check_fields_exist(fields, expr, call = caller_env()) \arguments{ \item{fields}{Vector of character strings to check the length of} -\item{expr}{A quosure expression} +\item{expr}{An expression} \item{call}{The calling environment to use in the error message} } diff --git a/man/check_values_to_length.Rd b/man/check_values_to_length.Rd deleted file mode 100644 index e9194a08..00000000 --- a/man/check_values_to_length.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/checks.R -\name{check_values_to_length} -\alias{check_values_to_length} -\title{Check values_to length against detected number of checkbox fields} -\usage{ -check_values_to_length(col_groups, values_to, call = caller_env()) -} -\arguments{ -\item{col_groups}{a list of column groups identified by checkbox field detection} - -\item{values_to}{a user defined character vector passed from \code{\link[=combine_checkboxes]{combine_checkboxes()}}} - -\item{call}{The calling environment to use in the error message} -} -\description{ -Check values_to length against detected number of checkbox fields -} -\keyword{internal} diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index f2ed6094..4a7a362d 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -8,7 +8,9 @@ combine_checkboxes( supertbl, tbl, cols, - values_to, + names_prefix = "", + names_suffix = NULL, + names_sep = "_", multi_value_label = "Multiple", values_fill = NA, raw_or_label = "label", @@ -20,11 +22,15 @@ combine_checkboxes( \item{tbl}{The \code{redcap_form_name} of the data tibble to extract. Required.} -\item{cols}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Checbox columns to combine to +\item{cols}{<\code{\link[=tidyr_tidy_select]{tidy-select}}> Checkbox columns to combine to single column. Required.} -\item{values_to}{A string specifying the name of the column to combine checkbox -values under. Required.} +\item{names_prefix}{String added to the start of every variable name.} + +\item{names_suffix}{String added to the end of every variable name.} + +\item{names_sep}{String to separate new column names from \code{names_prefix} and/or +\code{names_suffix}.} \item{multi_value_label}{A string specifying the value to be used when multiple checkbox fields are selected. Default "Multiple".} @@ -49,11 +55,10 @@ factor column. \examples{ \dontrun{ supertbl <- read_redcap(redcap_uri, token) -combined_tbl <- combine_checkboxes( +combine_checkboxes( supertbl = supertbl, tbl = "demographics", cols = starts_with("race"), - values_to = "race_combined", multi_value_label = "Multiple", values_fill = NA ) diff --git a/man/convert_metadata_spec.Rd b/man/convert_metadata_spec.Rd new file mode 100644 index 00000000..81b40fad --- /dev/null +++ b/man/convert_metadata_spec.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_checkboxes.R +\name{convert_metadata_spec} +\alias{convert_metadata_spec} +\title{Use metadata_spec to convert new column values} +\usage{ +convert_metadata_spec( + .new_col_item, + metadata_spec, + data_tbl_mod, + raw_or_label, + multi_value_label, + values_fill +) +} +\arguments{ +\item{.new_col_item}{A character string} + +\item{metadata_spec}{A tibble output from \code{\link[=convert_metadata_spec]{convert_metadata_spec()}}} + +\item{data_tbl_mod}{A modified data tibble} + +\item{raw_or_label}{Either 'raw' or 'label' to specify whether to use raw coded +values or labels for the options. Default 'label'.} + +\item{multi_value_label}{A string specifying the value to be used when multiple +checkbox fields are selected. Default "Multiple".} + +\item{values_fill}{Value to use when no checkboxes are selected. Default \code{NA}.} +} +\value{ +a tibble +} +\description{ +\code{\link[=convert_metadata_spec]{convert_metadata_spec()}} uses the \code{metadata_spec} table provided by \code{\link[=get_metadata_spec]{get_metadata_spec()}} +to automatically convert new column values to either: +\itemize{ +\item A \code{raw_or_label} checkbox value when only a single value is detected +\item \code{mult_value_label} when multiple values are detected +\item \code{values_fill} when \code{NA} is detected +} +} +\keyword{internal} diff --git a/man/get_metadata_ref.Rd b/man/get_metadata_ref.Rd deleted file mode 100644 index 6ced220b..00000000 --- a/man/get_metadata_ref.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/combine_checkboxes.R -\name{get_metadata_ref} -\alias{get_metadata_ref} -\title{Utility function for getting metadata raw and label values for checkboxes} -\usage{ -get_metadata_ref(metadata_tbl, selected_cols) -} -\arguments{ -\item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} - -\item{selected_cols}{Character string vector of field names for checkbox combination} -} -\value{ -a tibble -} -\description{ -Utility function for getting metadata raw and label values for checkboxes -} -\keyword{internal} diff --git a/man/get_metadata_spec.Rd b/man/get_metadata_spec.Rd new file mode 100644 index 00000000..dc5ade39 --- /dev/null +++ b/man/get_metadata_spec.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_checkboxes.R +\name{get_metadata_spec} +\alias{get_metadata_spec} +\title{Get metadata specification table} +\usage{ +get_metadata_spec( + metadata_tbl, + selected_cols, + names_prefix, + names_suffix, + names_sep +) +} +\arguments{ +\item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} + +\item{selected_cols}{Character string vector of field names for checkbox combination} + +\item{names_prefix}{String added to the start of every variable name.} + +\item{names_suffix}{String added to the end of every variable name.} + +\item{names_sep}{String to separate new column names from \code{names_prefix} and/or +\code{names_suffix}.} +} +\value{ +a tibble +} +\description{ +Get metadata specification table +} +\keyword{internal} diff --git a/man/replace_true.Rd b/man/replace_true.Rd new file mode 100644 index 00000000..b1d8ced5 --- /dev/null +++ b/man/replace_true.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_checkboxes.R +\name{replace_true} +\alias{replace_true} +\title{Replace checkbox TRUEs with raw_or_label values} +\usage{ +replace_true(col, col_name, metadata, raw_or_label) +} +\arguments{ +\item{col}{A vector} + +\item{col_name}{A string} + +\item{metadata}{A metadata tibble from the original supertibble} + +\item{raw_or_label}{Either 'raw' or 'label' to specify whether to use raw coded +values or labels for the options. Default 'label'.} +} +\value{ +A character string +} +\description{ +Simple utility function for replacing checkbox field values. +} +\keyword{internal} diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 3fc71072..77435375 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -272,22 +272,3 @@ test_that("check_fields_are_checkboxes works", { expect_error(check_fields_are_checkboxes(metadata), class = "non_checkbox_fields") expect_no_error(check_fields_are_checkboxes(metadata_filtered)) }) - -test_that("check_values_to_length length works", { - col_groups <- list( - checkbox_1 = c("checkbox1___1", "checkbox1___2"), - checkbox_2 = c("checkbox2___1") - ) - - values_to <- c("new_col1", "new_col2") - - expect_no_message(check_values_to_length(col_groups, values_to)) - - values_to_warn <- "new_col1" - - expect_warning(check_values_to_length(col_groups, values_to_warn), class = "checkbox_value_to_length") - - values_to_error <- c("new_col1", "new_col2", "new_col3") - - expect_error(check_values_to_length(col_groups, values_to_error), class = "checkbox_value_to_length") -}) diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 5132c2e5..7071a2b0 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -42,8 +42,7 @@ test_that("combine_checkboxes returns an expected supertbl", { out <- combine_checkboxes( supertbl = supertbl, tbl = "nonrepeat_instrument", - cols = starts_with("multi"), - values_to = "new_col" + cols = starts_with("multi") ) # values_fill declared expect_setequal(class(out), c("redcap_supertbl", "tbl_df", "tbl", "data.frame")) @@ -55,7 +54,6 @@ test_that("combine_checkboxes works for nonrepeat instrument", { supertbl = supertbl, tbl = "nonrepeat_instrument", cols = starts_with("multi"), - values_to = "new_col", multi_value_label = "multiple", # multi_value_label declared values_fill = "none" # values_fill declared ) %>% @@ -63,13 +61,13 @@ test_that("combine_checkboxes works for nonrepeat instrument", { dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"new_col", + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"_multi", 1, TRUE, FALSE, FALSE, TRUE, 1, "Red", 2, TRUE, TRUE, FALSE, TRUE, 2, "multiple", 3, FALSE, FALSE, FALSE, FALSE, 3, "none" ) %>% mutate( - new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none")) + `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "multiple", "none")) ) expect_equal(out, expected_out) @@ -80,20 +78,19 @@ test_that("combine_checkboxes works for nonrepeat instrument and drop old values supertbl = supertbl, tbl = "nonrepeat_instrument", cols = starts_with("multi"), - values_to = "new_col", keep = FALSE # Test keep = FALSE ) %>% pull(redcap_data) %>% dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"single_checkbox___1", ~"extra_data", ~"new_col", + ~"study_id", ~"single_checkbox___1", ~"extra_data", ~"_multi", 1, TRUE, 1, "Red", 2, TRUE, 2, "Multiple", 3, FALSE, 3, NA ) %>% mutate( - new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple")) + `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")) ) expect_equal(out, expected_out) @@ -103,36 +100,36 @@ test_that("combine_checkboxes works for repeat instrument", { out <- combine_checkboxes( supertbl = supertbl, tbl = "repeat_instrument", - cols = starts_with("repeat"), - values_to = "new_col" + cols = starts_with("repeat") ) %>% pull(redcap_data) %>% dplyr::nth(2) expected_out <- tibble::tribble( - ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", ~"new_col", + ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", ~"_repeat", 1, "event_1", 1, TRUE, FALSE, FALSE, "A", 2, "event_1", 1, TRUE, TRUE, TRUE, "Multiple", 2, "event_1", 2, FALSE, FALSE, FALSE, NA ) %>% mutate( - new_col = factor(new_col, levels = c("A", "B", "C", "Multiple")) + `_repeat` = factor(`_repeat`, levels = c("A", "B", "C", "Multiple")) ) expect_equal(out, expected_out) }) -test_that("get_metadata_ref works", { - out <- get_metadata_ref( +test_that("get_metadata_spec works", { + out <- get_metadata_spec( metadata_tbl = supertbl$redcap_metadata[[1]], - selected_cols = c("multi___1", "multi___2", "multi___3") + selected_cols = c("multi___1", "multi___2", "multi___3"), + names_prefix = "", names_suffix = NULL, names_sep = "_" # Mimic defaults ) expected_out <- tibble::tribble( - ~"field_name", ~"original_field", ~"raw", ~"label", - "multi___1", "multi", "1", "Red", - "multi___2", "multi", "2", "Yellow", - "multi___3", "multi", "3", "Blue" + ~"field_name", ~".value", ~".new_value", ~"raw", ~"label", + "multi___1", "multi", "_multi", "1", "Red", + "multi___2", "multi", "_multi", "2", "Yellow", + "multi___3", "multi", "_multi", "3", "Blue" ) expect_equal(out, expected_out) @@ -162,21 +159,20 @@ test_that("combine_checkboxes works for multiple checkbox fields", { supertbl = supertbl, tbl = "nonrepeat_instrument", cols = c(starts_with("multi"), starts_with("single_checkbox")), - values_to = c("new_col1", "new_col2"), keep = FALSE ) %>% pull(redcap_data) %>% dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"extra_data", ~"new_col1", ~"new_col2", + ~"study_id", ~"extra_data", ~"_multi", ~"_single_checkbox", 1, 1, "Red", "Green", 2, 2, "Multiple", "Green", 3, 3, NA, NA ) %>% mutate( - new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")), - new_col2 = factor(new_col2, levels = c("Green", "Multiple")) + `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")), + `_single_checkbox` = factor(`_single_checkbox`, levels = c("Green", "Multiple")) ) expect_equal(out, expected_out) @@ -187,21 +183,53 @@ test_that("combine_checkboxes works for multiple checkbox fields with logicals", supertbl = supertbl, tbl = "nonrepeat_instrument", cols = c(starts_with("multi") | starts_with("single_checkbox")), - values_to = c("new_col1", "new_col2"), keep = FALSE ) %>% pull(redcap_data) %>% dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"extra_data", ~"new_col1", ~"new_col2", + ~"study_id", ~"extra_data", ~"_multi", ~"_single_checkbox", 1, 1, "Red", "Green", 2, 2, "Multiple", "Green", 3, 3, NA, NA ) %>% mutate( - new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")), - new_col2 = factor(new_col2, levels = c("Green", "Multiple")) + `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")), + `_single_checkbox` = factor(`_single_checkbox`, levels = c("Green", "Multiple")) + ) + + expect_equal(out, expected_out) +}) + +test_that("convert_metadata_spec works", { + .new_col_item <- "_multi" + metadata_spec <- get_metadata_spec( + metadata_tbl = supertbl$redcap_metadata[[1]], + selected_cols = c("multi___1", "multi___2", "multi___3"), + names_prefix = "", names_suffix = NULL, names_sep = "_" # Mimic defaults + ) + + data_tbl_mod <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", + ~"extra_data", ~"_multi", ~"_single_checkbox", + 1, "Red", NA, NA, NA, 1, FALSE, FALSE, + 2, "Red", "Yellow", NA, "Green", 2, TRUE, FALSE, + 3, NA, NA, NA, NA, 3, FALSE, FALSE + ) + + out <- convert_metadata_spec(.new_col_item, metadata_spec, data_tbl_mod, + raw_or_label = "label", multi_value_label = "Multiple", values_fill = NA) + + expected_out <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", + ~"extra_data", ~"_multi", ~"_single_checkbox", + 1, "Red", NA, NA, NA, 1, "Red", FALSE, + 2, "Red", "Yellow", NA, "Green", 2, "Multiple", FALSE, + 3, NA, NA, NA, NA, 3, NA, FALSE + ) %>% + mutate( + `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")) ) expect_equal(out, expected_out) From c185e39410ef918fe0afa8b25f64b0c38eba12cb Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 2 Aug 2024 11:55:25 -0400 Subject: [PATCH 30/39] Add check_metadata_fields_exist, update details --- R/checks.R | 29 ++++++++++++++++++++++++ R/combine_checkboxes.R | 12 ++++++++++ man/check_metadata_fields_exist.Rd | 22 ++++++++++++++++++ man/combine_checkboxes.Rd | 10 ++++++++ tests/testthat/test-checks.R | 21 +++++++++++++++++ tests/testthat/test-combine_checkboxes.R | 3 ++- 6 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 man/check_metadata_fields_exist.Rd diff --git a/R/checks.R b/R/checks.R index 46758d71..f708ba43 100644 --- a/R/checks.R +++ b/R/checks.R @@ -684,6 +684,35 @@ check_fields_exist <- function(fields, expr, call = caller_env()) { } } +#' @title +#' Check metadata fields exist for checkbox combination +#' +#' @description +#' Similar to [check_fields_exist()], but instead of verifying fields that exist +#' in the data tibble this seeks to verify their existence under the metadata +#' tibble `field_name`s. +#' +#' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()]. +#' @param cols Selected columns identified for [`combine_checkboxes()`] to be +#' cross checked against `metadata_tibble$field_name` +#' @param call The calling environment to use in the error message +#' +#' @keywords internal +check_metadata_fields_exist <- function(metadata_tbl, cols, call = caller_env()) { + if (!all(cols %in% metadata_tbl$field_name)) { + msg <- c( + x = "Fields detected not present in metadata.", + `!` = "Column{?s} {.code {cols[!cols %in% metadata_tbl$field_name]}} detected as valid in the data tibble, but not found present in the metadata tibble.", # nolint: line_length_linter + `i` = "This may occur if either the names of the data tibble or the metadata tibble `field_name`s were edited." + ) + + cli_abort( + msg, + class = c("missing_metadata_checkbox_fields", "REDCapTidieR_cond") + ) + } +} + #' @title #' Check fields are of checkbox field type diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index e004d185..5b14c200 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -6,6 +6,16 @@ #' combining multiple binary columns into a singular and informative labelled #' factor column. #' +#' @details +#' [combine_checkboxes()] makes use of the output names of [read_redcap()] +#' data tibbles and metadata tibbles. Changes to checkbox data names or +#' metadata `field_name`s may result in errors. +#' +#' Checkbox fields are expanded to be a variable per checkbox option, separated +#' by underscores. For example, `checkbox_var` with 2 options becomes +#' `checkbox_var___1` and `checkbox_var___2`. [combine_checkboxes()] looks for +#' these and may give a error if it cannot find them. +#' #' @param supertbl A supertibble generated by [read_redcap()]. Required. #' @param tbl The `redcap_form_name` of the data tibble to extract. Required. #' @param cols <[`tidy-select`][tidyr_tidy_select]> Checkbox columns to combine to @@ -141,6 +151,8 @@ get_metadata_spec <- function(metadata_tbl, names_prefix, names_suffix, names_sep) { + check_metadata_fields_exist(metadata_tbl, selected_cols) + # Create a metadata reference table linking field name to raw and label values out <- metadata_tbl %>% filter(.data$field_name %in% selected_cols) %>% diff --git a/man/check_metadata_fields_exist.Rd b/man/check_metadata_fields_exist.Rd new file mode 100644 index 00000000..cc1c3df7 --- /dev/null +++ b/man/check_metadata_fields_exist.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_metadata_fields_exist} +\alias{check_metadata_fields_exist} +\title{Check metadata fields exist for checkbox combination} +\usage{ +check_metadata_fields_exist(metadata_tbl, cols, call = caller_env()) +} +\arguments{ +\item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} + +\item{cols}{Selected columns identified for \code{\link[=combine_checkboxes]{combine_checkboxes()}} to be +cross checked against \code{metadata_tibble$field_name}} + +\item{call}{The calling environment to use in the error message} +} +\description{ +Similar to \code{\link[=check_fields_exist]{check_fields_exist()}}, but instead of verifying fields that exist +in the data tibble this seeks to verify their existence under the metadata +tibble \code{field_name}s. +} +\keyword{internal} diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index 4a7a362d..676cbd74 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -52,6 +52,16 @@ tibble into a single column. This transformation simplifies analysis by combining multiple binary columns into a singular and informative labelled factor column. } +\details{ +\code{\link[=combine_checkboxes]{combine_checkboxes()}} makes use of the output names of \code{\link[=read_redcap]{read_redcap()}} +data tibbles and metadata tibbles. Changes to checkbox data names or +metadata \code{field_name}s may result in errors. + +Checkbox fields are expanded to be a variable per checkbox option, separated +by underscores. For example, \code{checkbox_var} with 2 options becomes +\code{checkbox_var___1} and \code{checkbox_var___2}. \code{\link[=combine_checkboxes]{combine_checkboxes()}} looks for +these and may give a error if it cannot find them. +} \examples{ \dontrun{ supertbl <- read_redcap(redcap_uri, token) diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 77435375..3fefdf8d 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -255,6 +255,27 @@ test_that("check_fields_exist works", { expect_no_error() }) +test_that("check_metadata_fields_exist works", { + metadata_valid <- tibble( + field_name = c("var1", "var2", "var_3"), + ) + valid_cols <- c("var1", "var2", "var_3") + + metadata_invalid <- tibble( + field_name = c("var1", "var2", "var_3_edited"), + ) + invalid_cols <- c("var1", "var2", "var_3_edited") + + check_metadata_fields_exist(metadata_valid, valid_cols) %>% + expect_no_error() + + check_metadata_fields_exist(metadata_invalid, valid_cols) %>% + expect_error(class = "missing_metadata_checkbox_fields") + + check_metadata_fields_exist(metadata_valid, invalid_cols) %>% + expect_error(class = "missing_metadata_checkbox_fields") +}) + test_that("check_fields_are_checkboxes works", { metadata <- tibble::tribble( ~field_name, ~field_type, diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 7071a2b0..65757c11 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -219,7 +219,8 @@ test_that("convert_metadata_spec works", { ) out <- convert_metadata_spec(.new_col_item, metadata_spec, data_tbl_mod, - raw_or_label = "label", multi_value_label = "Multiple", values_fill = NA) + raw_or_label = "label", multi_value_label = "Multiple", values_fill = NA + ) expected_out <- tibble::tribble( ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", From abdc5129c58eeeaaff0b5e4032c716cc194317cf Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 2 Aug 2024 17:14:06 -0400 Subject: [PATCH 31/39] Consoldiate and rework checkbox value conversion --- R/combine_checkboxes.R | 86 +++++++++--------------- man/convert_checkbox_vals.Rd | 42 ++++++++++++ man/convert_metadata_spec.Rd | 43 ------------ tests/testthat/test-combine_checkboxes.R | 42 +++++------- 4 files changed, 90 insertions(+), 123 deletions(-) create mode 100644 man/convert_checkbox_vals.Rd delete mode 100644 man/convert_metadata_spec.Rd diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 5b14c200..009bc978 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -89,19 +89,6 @@ combine_checkboxes <- function(supertbl, data_tbl_mod <- data_tbl .new_col <- unique(metadata_spec$.new_value) - for (i in seq_along(.new_col)) { - cols_to_sum <- metadata_spec$field_name[metadata_spec$.new_value == .new_col[i]] # nolint: object_usage_linter - - data_tbl_mod <- data_tbl_mod %>% - mutate( - !!.new_col[i] := case_when( - rowSums(select(., cols_to_sum)) > 1 ~ TRUE, - .default = FALSE - ) - ) - } - - # Replace TRUEs/1s with raw/label values from metadata data_tbl_mod <- data_tbl_mod %>% mutate( across( @@ -115,10 +102,14 @@ combine_checkboxes <- function(supertbl, across(selected_cols, as.character) # enforce to character strings ) - # Use the metadata_spec table to fill values in .new_col - data_tbl_mod <- reduce(.new_col, function(tbl, col_item) { - convert_metadata_spec(col_item, metadata_spec, tbl, raw_or_label, multi_value_label, values_fill) - }, .init = data_tbl_mod) + new_cols <- metadata_spec %>% + nest(.by = .data$.new_value, .key = "metadata") %>% + pmap(convert_checkbox_vals, + data_tbl = data_tbl_mod, + raw_or_label = raw_or_label, multi_value_label = multi_value_label, values_fill = values_fill + ) + + data_tbl_mod <- bind_cols(data_tbl_mod, new_cols) final_tbl <- bind_cols( data_tbl, @@ -204,52 +195,35 @@ replace_true <- function(col, col_name, metadata, raw_or_label) { return(col) } -#' @title Use metadata_spec to convert new column values +#' @title Convert a new checkbox column's values #' -#' @description -#' [convert_metadata_spec()] uses the `metadata_spec` table provided by [get_metadata_spec()] -#' to automatically convert new column values to either: +#' @description This function takes a single column of data and converts the values +#' based on the overall data tibble cross referenced with a nested section of the +#' metadata tibble. #' -#' - A `raw_or_label` checkbox value when only a single value is detected -#' - `mult_value_label` when multiple values are detected -#' - `values_fill` when `NA` is detected +#' [case_when()] logic helps determine whether the value is a coalesced singular +#' value or a user-specified one via `multi_value_label` or `values_fill`. #' -#' @inheritParams combine_checkboxes -#' @param .new_col_item A character string -#' @param metadata_spec A tibble output from [convert_metadata_spec()] -#' @param data_tbl_mod A modified data tibble -#' -#' @returns a tibble +#' @details +#' This function is used in conjunction with [pmap()]. #' #' @keywords internal -convert_metadata_spec <- function(.new_col_item, - metadata_spec, - data_tbl_mod, - raw_or_label, - multi_value_label, - values_fill) { - .col_group <- metadata_spec$field_name[metadata_spec$.new_value == .new_col_item] - - metadata_overwrite <- metadata_spec %>% - filter(.data$field_name %in% .col_group) %>% - pull(raw_or_label) - - data_tbl_mod <- data_tbl_mod %>% +#' +#' @param metadata A nested portion of the overall metadata tibble +#' @param data_tbl The data tibble from the original supertibble +#' @param .new_value The new column values made by [combine_checkboxes()] +#' @inheritParams combine_checkboxes +convert_checkbox_vals <- function(metadata, .new_value, data_tbl, raw_or_label, multi_value_label, values_fill) { + tibble( + !!.new_value := rowSums(!is.na(data_tbl[names(data_tbl) %in% metadata$field_name])) + ) %>% mutate( - !!.new_col_item := ifelse(!!sym(.new_col_item), - multi_value_label, - coalesce(!!!syms(.col_group)) + !!.new_value := case_when(. > 1 ~ multi_value_label, + . == 1 ~ coalesce(!!!data_tbl[, names(data_tbl) %in% metadata$field_name]), + .default = values_fill ), - !!.new_col_item := ifelse(is.na(!!sym(.new_col_item)), - values_fill, - !!sym(.new_col_item) - ) - ) %>% - mutate( - !!.new_col_item := factor(!!sym(.new_col_item), - levels = c(metadata_overwrite, multi_value_label, values_fill) + !!.new_value := factor(!!sym(.new_value), + levels = c(metadata[[raw_or_label]], multi_value_label, values_fill) ) ) - - return(data_tbl_mod) } diff --git a/man/convert_checkbox_vals.Rd b/man/convert_checkbox_vals.Rd new file mode 100644 index 00000000..9aa48cf4 --- /dev/null +++ b/man/convert_checkbox_vals.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_checkboxes.R +\name{convert_checkbox_vals} +\alias{convert_checkbox_vals} +\title{Convert a new checkbox column's values} +\usage{ +convert_checkbox_vals( + metadata, + .new_value, + data_tbl, + raw_or_label, + multi_value_label, + values_fill +) +} +\arguments{ +\item{metadata}{A nested portion of the overall metadata tibble} + +\item{.new_value}{The new column values made by \code{\link[=combine_checkboxes]{combine_checkboxes()}}} + +\item{data_tbl}{The data tibble from the original supertibble} + +\item{raw_or_label}{Either 'raw' or 'label' to specify whether to use raw coded +values or labels for the options. Default 'label'.} + +\item{multi_value_label}{A string specifying the value to be used when multiple +checkbox fields are selected. Default "Multiple".} + +\item{values_fill}{Value to use when no checkboxes are selected. Default \code{NA}.} +} +\description{ +This function takes a single column of data and converts the values +based on the overall data tibble cross referenced with a nested section of the +metadata tibble. + +\code{\link[=case_when]{case_when()}} logic helps determine whether the value is a coalesced singular +value or a user-specified one via \code{multi_value_label} or \code{values_fill}. +} +\details{ +This function is used in conjunction with \code{\link[=pmap]{pmap()}}. +} +\keyword{internal} diff --git a/man/convert_metadata_spec.Rd b/man/convert_metadata_spec.Rd deleted file mode 100644 index 81b40fad..00000000 --- a/man/convert_metadata_spec.Rd +++ /dev/null @@ -1,43 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/combine_checkboxes.R -\name{convert_metadata_spec} -\alias{convert_metadata_spec} -\title{Use metadata_spec to convert new column values} -\usage{ -convert_metadata_spec( - .new_col_item, - metadata_spec, - data_tbl_mod, - raw_or_label, - multi_value_label, - values_fill -) -} -\arguments{ -\item{.new_col_item}{A character string} - -\item{metadata_spec}{A tibble output from \code{\link[=convert_metadata_spec]{convert_metadata_spec()}}} - -\item{data_tbl_mod}{A modified data tibble} - -\item{raw_or_label}{Either 'raw' or 'label' to specify whether to use raw coded -values or labels for the options. Default 'label'.} - -\item{multi_value_label}{A string specifying the value to be used when multiple -checkbox fields are selected. Default "Multiple".} - -\item{values_fill}{Value to use when no checkboxes are selected. Default \code{NA}.} -} -\value{ -a tibble -} -\description{ -\code{\link[=convert_metadata_spec]{convert_metadata_spec()}} uses the \code{metadata_spec} table provided by \code{\link[=get_metadata_spec]{get_metadata_spec()}} -to automatically convert new column values to either: -\itemize{ -\item A \code{raw_or_label} checkbox value when only a single value is detected -\item \code{mult_value_label} when multiple values are detected -\item \code{values_fill} when \code{NA} is detected -} -} -\keyword{internal} diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 65757c11..dc7e7bc6 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -202,36 +202,30 @@ test_that("combine_checkboxes works for multiple checkbox fields with logicals", expect_equal(out, expected_out) }) -test_that("convert_metadata_spec works", { - .new_col_item <- "_multi" - metadata_spec <- get_metadata_spec( - metadata_tbl = supertbl$redcap_metadata[[1]], - selected_cols = c("multi___1", "multi___2", "multi___3"), - names_prefix = "", names_suffix = NULL, names_sep = "_" # Mimic defaults +test_that("convert_checkbox_vals works()", { + metadata <- tibble::tribble( + ~"field_name", ~".value", ~"raw", ~"label", + "multi___1", "multi", "1", "Red", + "multi___2", "multi", "2", "Yellow", + "multi___3", "multi", "3", "Blue" ) - data_tbl_mod <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", - ~"extra_data", ~"_multi", ~"_single_checkbox", - 1, "Red", NA, NA, NA, 1, FALSE, FALSE, - 2, "Red", "Yellow", NA, "Green", 2, TRUE, FALSE, - 3, NA, NA, NA, NA, 3, FALSE, FALSE + # Same as nonrepeat data tbl but with NAs for FALSEs, post processed with metadata spec vals + data_tbl <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", + 1, "Red", NA, NA, "Green", 1, + 2, "Red", "Yellow", NA, "Green", 2, + 3, NA, NA, NA, NA, 3 ) - out <- convert_metadata_spec(.new_col_item, metadata_spec, data_tbl_mod, - raw_or_label = "label", multi_value_label = "Multiple", values_fill = NA + out <- convert_checkbox_vals( + metadata = metadata, .new_value = "_multi", data_tbl = data_tbl, + raw_or_label = "label", multi_value_label = "multi", values_fill = NA ) - expected_out <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", - ~"extra_data", ~"_multi", ~"_single_checkbox", - 1, "Red", NA, NA, NA, 1, "Red", FALSE, - 2, "Red", "Yellow", NA, "Green", 2, "Multiple", FALSE, - 3, NA, NA, NA, NA, 3, NA, FALSE - ) %>% - mutate( - `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")) - ) + expected_out <- tibble( + `_multi` = factor(c("Red", "multi", NA), levels = c("Red", "Yellow", "Blue", "multi")) + ) expect_equal(out, expected_out) }) From 50d47d6f4c2069001a86ce86fc706e7f03b1b912 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 5 Aug 2024 12:10:21 -0400 Subject: [PATCH 32/39] Add names_repair strategy support --- R/combine_checkboxes.R | 58 ++++++++++++++++++++---- man/combine_and_repair_tbls.Rd | 26 +++++++++++ man/combine_checkboxes.Rd | 2 + tests/testthat/test-combine_checkboxes.R | 36 +++++++++++++++ 4 files changed, 113 insertions(+), 9 deletions(-) create mode 100644 man/combine_and_repair_tbls.Rd diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 009bc978..e98c5073 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -54,6 +54,8 @@ combine_checkboxes <- function(supertbl, names_prefix = "", names_suffix = NULL, names_sep = "_", + names_glue = NULL, + names_repair = "check_unique", multi_value_label = "Multiple", values_fill = NA, raw_or_label = "label", @@ -84,10 +86,8 @@ combine_checkboxes <- function(supertbl, metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_suffix, names_sep) - # Define .new_col as the count of TRUEs/1s for the given checkbox field - # Assign TRUE if multiple selections made, and FALSE if one or zero made + # Copy data_tbl to mod, data_tbl to be referenced later data_tbl_mod <- data_tbl - .new_col <- unique(metadata_spec$.new_value) data_tbl_mod <- data_tbl_mod %>% mutate( @@ -109,12 +109,7 @@ combine_checkboxes <- function(supertbl, raw_or_label = raw_or_label, multi_value_label = multi_value_label, values_fill = values_fill ) - data_tbl_mod <- bind_cols(data_tbl_mod, new_cols) - - final_tbl <- bind_cols( - data_tbl, - data_tbl_mod %>% select(!!.new_col) - ) + final_tbl <- combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair = names_repair) # Keep or remove original multi columns if (!keep) { @@ -227,3 +222,48 @@ convert_checkbox_vals <- function(metadata, .new_value, data_tbl, raw_or_label, ) ) } + +#' @title Combine checkbox fields with respect to repaired outputs +#' +#' @description +#' This function seeks to preserve the original data columns and types from the +#' originally supplied data_tbl and add on the new columns from data_tbl_mod. +#' +#' If `names_repair` presents a repair strategy, the output columns will be +#' captured and updated here while dropping the original columns. +#' +#' @param data_tbl The original data table given to [combine_checkboxes()] +#' @param data_tbl_mod A modified data table from `data_tbl` +#' @param new_cols The new columns created for checkbox combination +#' @inheritParams combine_checkboxes +#' +#' @keywords internal +#' +#' @returns a tibble +combine_and_repair_tbls <- function(data_tbl, data_tbl_mod, new_cols, names_repair) { + # Perform initial column bind with repair strategy + data_tbl_mod <- bind_cols(data_tbl_mod, new_cols, .name_repair = names_repair) + + # Get the column names of each table + cols_data_tbl <- names(data_tbl) + cols_data_tbl_mod <- names(data_tbl_mod) + + # Identify common columns + common_cols <- intersect(cols_data_tbl, cols_data_tbl_mod) + + # Identify unique columns in data_tbl_mod + unique_cols_mod <- setdiff(cols_data_tbl_mod, cols_data_tbl) + + # Select common columns from data_tbl + common_data <- data_tbl %>% + select(all_of(common_cols)) + + # Select unique columns from data_tbl_mod + unique_data_mod <- data_tbl_mod %>% + select(all_of(unique_cols_mod)) + + # Combine the selected columns + combined_data <- bind_cols(common_data, unique_data_mod) + + return(combined_data) +} diff --git a/man/combine_and_repair_tbls.Rd b/man/combine_and_repair_tbls.Rd new file mode 100644 index 00000000..e6b35e86 --- /dev/null +++ b/man/combine_and_repair_tbls.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_checkboxes.R +\name{combine_and_repair_tbls} +\alias{combine_and_repair_tbls} +\title{Combine checkbox fields with respect to repaired outputs} +\usage{ +combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair) +} +\arguments{ +\item{data_tbl}{The original data table given to \code{\link[=combine_checkboxes]{combine_checkboxes()}}} + +\item{data_tbl_mod}{A modified data table from \code{data_tbl}} + +\item{new_cols}{The new columns created for checkbox combination} +} +\value{ +a tibble +} +\description{ +This function seeks to preserve the original data columns and types from the +originally supplied data_tbl and add on the new columns from data_tbl_mod. + +If \code{names_repair} presents a repair strategy, the output columns will be +captured and updated here while dropping the original columns. +} +\keyword{internal} diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index 676cbd74..71e684ce 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -11,6 +11,8 @@ combine_checkboxes( names_prefix = "", names_suffix = NULL, names_sep = "_", + names_glue = NULL, + names_repair = "check_unique", multi_value_label = "Multiple", values_fill = NA, raw_or_label = "label", diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index dc7e7bc6..6a611041 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -229,3 +229,39 @@ test_that("convert_checkbox_vals works()", { expect_equal(out, expected_out) }) + +test_that("combine_and_repair_tbls works", { + data_tbl <- tibble( + id = 1, + x___1 = TRUE, + x___2 = FALSE, + x = "val" + ) + + data_tbl_mod <- tibble( + id = 1, + x___1 = "A", + x___2 = NA, + x = "val" + ) + + new_cols <- list(x = "A") + + expect_error(combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair = "check_unique")) + expect_no_error(combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair = "unique")) %>% + suppressMessages() + + expected_out <- tibble( + id = 1, + x___1 = TRUE, + x___2 = FALSE, + x...4 = "val", + x...5 = "A" + ) + + expect_equal( + expected_out, + combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair = "unique") + ) %>% + suppressMessages() +}) From a6d150dec703d7576fa138ddca0d55e1b39ee81e Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 5 Aug 2024 13:44:01 -0400 Subject: [PATCH 33/39] Remove names_suffix, restructure prefix/sep --- R/combine_checkboxes.R | 13 +++------- man/combine_checkboxes.Rd | 6 +---- man/get_metadata_spec.Rd | 13 ++-------- tests/testthat/test-combine_checkboxes.R | 32 ++++++++++++------------ 4 files changed, 23 insertions(+), 41 deletions(-) diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index e98c5073..5c17e539 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -21,9 +21,7 @@ #' @param cols <[`tidy-select`][tidyr_tidy_select]> Checkbox columns to combine to #' single column. Required. #' @param names_prefix String added to the start of every variable name. -#' @param names_suffix String added to the end of every variable name. -#' @param names_sep String to separate new column names from `names_prefix` and/or -#' `names_suffix`. +#' @param names_sep String to separate new column names from `names_prefix`. #' @param multi_value_label A string specifying the value to be used when multiple #' checkbox fields are selected. Default "Multiple". #' @param values_fill Value to use when no checkboxes are selected. Default `NA`. @@ -52,7 +50,6 @@ combine_checkboxes <- function(supertbl, tbl, cols, names_prefix = "", - names_suffix = NULL, names_sep = "_", names_glue = NULL, names_repair = "check_unique", @@ -63,7 +60,6 @@ combine_checkboxes <- function(supertbl, # Check args --- check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata")) check_arg_is_character(names_prefix, len = 1) - check_arg_is_character(names_suffix, len = 1, null.ok = TRUE) check_arg_is_character(names_sep, len = 1, any.missing = TRUE) check_arg_is_character(tbl, len = 1, any.missing = FALSE) check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE) @@ -84,7 +80,7 @@ combine_checkboxes <- function(supertbl, # Get metadata reference table, check that chosen fields are checkboxes metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] - metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_suffix, names_sep) + metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep) # Copy data_tbl to mod, data_tbl to be referenced later data_tbl_mod <- data_tbl @@ -135,7 +131,6 @@ combine_checkboxes <- function(supertbl, get_metadata_spec <- function(metadata_tbl, selected_cols, names_prefix, - names_suffix, names_sep) { check_metadata_fields_exist(metadata_tbl, selected_cols) @@ -144,8 +139,8 @@ get_metadata_spec <- function(metadata_tbl, filter(.data$field_name %in% selected_cols) %>% mutate( .value = sub("___.*$", "", .data$field_name), - .new_value = case_when(!is.null(names_suffix) ~ paste(names_prefix, .value, names_suffix, sep = names_sep), - .default = paste(names_prefix, .data$.value, sep = names_sep) + .new_value = case_when(names_prefix != "" ~ paste(names_prefix, .value, sep = names_sep), + .default = paste(names_prefix, .data$.value, sep = "") ) ) diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index 71e684ce..b0968c04 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -9,7 +9,6 @@ combine_checkboxes( tbl, cols, names_prefix = "", - names_suffix = NULL, names_sep = "_", names_glue = NULL, names_repair = "check_unique", @@ -29,10 +28,7 @@ single column. Required.} \item{names_prefix}{String added to the start of every variable name.} -\item{names_suffix}{String added to the end of every variable name.} - -\item{names_sep}{String to separate new column names from \code{names_prefix} and/or -\code{names_suffix}.} +\item{names_sep}{String to separate new column names from \code{names_prefix}.} \item{multi_value_label}{A string specifying the value to be used when multiple checkbox fields are selected. Default "Multiple".} diff --git a/man/get_metadata_spec.Rd b/man/get_metadata_spec.Rd index dc5ade39..9b613ffc 100644 --- a/man/get_metadata_spec.Rd +++ b/man/get_metadata_spec.Rd @@ -4,13 +4,7 @@ \alias{get_metadata_spec} \title{Get metadata specification table} \usage{ -get_metadata_spec( - metadata_tbl, - selected_cols, - names_prefix, - names_suffix, - names_sep -) +get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep) } \arguments{ \item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} @@ -19,10 +13,7 @@ get_metadata_spec( \item{names_prefix}{String added to the start of every variable name.} -\item{names_suffix}{String added to the end of every variable name.} - -\item{names_sep}{String to separate new column names from \code{names_prefix} and/or -\code{names_suffix}.} +\item{names_sep}{String to separate new column names from \code{names_prefix}.} } \value{ a tibble diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 6a611041..027bc7d2 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -61,13 +61,13 @@ test_that("combine_checkboxes works for nonrepeat instrument", { dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"_multi", + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"multi", 1, TRUE, FALSE, FALSE, TRUE, 1, "Red", 2, TRUE, TRUE, FALSE, TRUE, 2, "multiple", 3, FALSE, FALSE, FALSE, FALSE, 3, "none" ) %>% mutate( - `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "multiple", "none")) + multi = factor(multi, levels = c("Red", "Yellow", "Blue", "multiple", "none")) ) expect_equal(out, expected_out) @@ -84,13 +84,13 @@ test_that("combine_checkboxes works for nonrepeat instrument and drop old values dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"single_checkbox___1", ~"extra_data", ~"_multi", + ~"study_id", ~"single_checkbox___1", ~"extra_data", ~"multi", 1, TRUE, 1, "Red", 2, TRUE, 2, "Multiple", 3, FALSE, 3, NA ) %>% mutate( - `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")) + multi = factor(multi, levels = c("Red", "Yellow", "Blue", "Multiple")) ) expect_equal(out, expected_out) @@ -106,13 +106,13 @@ test_that("combine_checkboxes works for repeat instrument", { dplyr::nth(2) expected_out <- tibble::tribble( - ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", ~"_repeat", + ~"study_id", ~"redcap_event", ~"redcap_form_instance", ~"repeat___1", ~"repeat___2", ~"repeat___3", ~"repeat", 1, "event_1", 1, TRUE, FALSE, FALSE, "A", 2, "event_1", 1, TRUE, TRUE, TRUE, "Multiple", 2, "event_1", 2, FALSE, FALSE, FALSE, NA ) %>% mutate( - `_repeat` = factor(`_repeat`, levels = c("A", "B", "C", "Multiple")) + `repeat` = factor(`repeat`, levels = c("A", "B", "C", "Multiple")) ) expect_equal(out, expected_out) @@ -122,14 +122,14 @@ test_that("get_metadata_spec works", { out <- get_metadata_spec( metadata_tbl = supertbl$redcap_metadata[[1]], selected_cols = c("multi___1", "multi___2", "multi___3"), - names_prefix = "", names_suffix = NULL, names_sep = "_" # Mimic defaults + names_prefix = "", names_sep = "_" # Mimic defaults ) expected_out <- tibble::tribble( ~"field_name", ~".value", ~".new_value", ~"raw", ~"label", - "multi___1", "multi", "_multi", "1", "Red", - "multi___2", "multi", "_multi", "2", "Yellow", - "multi___3", "multi", "_multi", "3", "Blue" + "multi___1", "multi", "multi", "1", "Red", + "multi___2", "multi", "multi", "2", "Yellow", + "multi___3", "multi", "multi", "3", "Blue" ) expect_equal(out, expected_out) @@ -165,14 +165,14 @@ test_that("combine_checkboxes works for multiple checkbox fields", { dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"extra_data", ~"_multi", ~"_single_checkbox", + ~"study_id", ~"extra_data", ~"multi", ~"single_checkbox", 1, 1, "Red", "Green", 2, 2, "Multiple", "Green", 3, 3, NA, NA ) %>% mutate( - `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")), - `_single_checkbox` = factor(`_single_checkbox`, levels = c("Green", "Multiple")) + multi = factor(multi, levels = c("Red", "Yellow", "Blue", "Multiple")), + single_checkbox = factor(single_checkbox, levels = c("Green", "Multiple")) ) expect_equal(out, expected_out) @@ -189,14 +189,14 @@ test_that("combine_checkboxes works for multiple checkbox fields with logicals", dplyr::first() expected_out <- tibble::tribble( - ~"study_id", ~"extra_data", ~"_multi", ~"_single_checkbox", + ~"study_id", ~"extra_data", ~"multi", ~"single_checkbox", 1, 1, "Red", "Green", 2, 2, "Multiple", "Green", 3, 3, NA, NA ) %>% mutate( - `_multi` = factor(`_multi`, levels = c("Red", "Yellow", "Blue", "Multiple")), - `_single_checkbox` = factor(`_single_checkbox`, levels = c("Green", "Multiple")) + multi = factor(multi, levels = c("Red", "Yellow", "Blue", "Multiple")), + single_checkbox = factor(single_checkbox, levels = c("Green", "Multiple")) ) expect_equal(out, expected_out) From 0f868b8ccf84f66a97458a8bd5b34935125b65eb Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 5 Aug 2024 14:42:57 -0400 Subject: [PATCH 34/39] Add names_glue spec --- R/combine_checkboxes.R | 31 +++++++++++++++++------- man/combine_checkboxes.Rd | 3 +++ man/get_metadata_spec.Rd | 11 ++++++++- tests/testthat/test-combine_checkboxes.R | 27 +++++++++++++++++++++ 4 files changed, 62 insertions(+), 10 deletions(-) diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 5c17e539..027b2f0f 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -22,6 +22,8 @@ #' single column. Required. #' @param names_prefix String added to the start of every variable name. #' @param names_sep String to separate new column names from `names_prefix`. +#' @param names_glue Instead of `names_sep` and `names_prefix`, you can supply +#' a glue specification and the unique `.value` to create custom column names. #' @param multi_value_label A string specifying the value to be used when multiple #' checkbox fields are selected. Default "Multiple". #' @param values_fill Value to use when no checkboxes are selected. Default `NA`. @@ -80,7 +82,7 @@ combine_checkboxes <- function(supertbl, # Get metadata reference table, check that chosen fields are checkboxes metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] - metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep) + metadata_spec <- get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep, names_glue) # Copy data_tbl to mod, data_tbl to be referenced later data_tbl_mod <- data_tbl @@ -131,18 +133,29 @@ combine_checkboxes <- function(supertbl, get_metadata_spec <- function(metadata_tbl, selected_cols, names_prefix, - names_sep) { + names_sep, + names_glue) { check_metadata_fields_exist(metadata_tbl, selected_cols) # Create a metadata reference table linking field name to raw and label values - out <- metadata_tbl %>% - filter(.data$field_name %in% selected_cols) %>% - mutate( - .value = sub("___.*$", "", .data$field_name), - .new_value = case_when(names_prefix != "" ~ paste(names_prefix, .value, sep = names_sep), - .default = paste(names_prefix, .data$.value, sep = "") + if (!is.null(names_glue)) { + # Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep + out <- metadata_tbl %>% + filter(.data$field_name %in% selected_cols) %>% + mutate( + .value = sub("___.*$", "", .data$field_name), + .new_value = as.character(glue::glue(names_glue)) ) - ) + } else { + out <- metadata_tbl %>% + filter(.data$field_name %in% selected_cols) %>% + mutate( + .value = sub("___.*$", "", .data$field_name), + .new_value = case_when(names_prefix != "" ~ paste(names_prefix, .value, sep = names_sep), + .default = paste(names_prefix, .data$.value, sep = "") + ) + ) + } # Make sure selection is checkbox metadata field type check_fields_are_checkboxes(out) diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index b0968c04..f95c74a3 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -30,6 +30,9 @@ single column. Required.} \item{names_sep}{String to separate new column names from \code{names_prefix}.} +\item{names_glue}{Instead of \code{names_sep} and \code{names_prefix}, you can supply +a glue specification and the unique \code{.value} to create custom column names.} + \item{multi_value_label}{A string specifying the value to be used when multiple checkbox fields are selected. Default "Multiple".} diff --git a/man/get_metadata_spec.Rd b/man/get_metadata_spec.Rd index 9b613ffc..efe22d10 100644 --- a/man/get_metadata_spec.Rd +++ b/man/get_metadata_spec.Rd @@ -4,7 +4,13 @@ \alias{get_metadata_spec} \title{Get metadata specification table} \usage{ -get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep) +get_metadata_spec( + metadata_tbl, + selected_cols, + names_prefix, + names_sep, + names_glue +) } \arguments{ \item{metadata_tbl}{A metadata tibble from the supertibble generated by \code{\link[=read_redcap]{read_redcap()}}.} @@ -14,6 +20,9 @@ get_metadata_spec(metadata_tbl, selected_cols, names_prefix, names_sep) \item{names_prefix}{String added to the start of every variable name.} \item{names_sep}{String to separate new column names from \code{names_prefix}.} + +\item{names_glue}{Instead of \code{names_sep} and \code{names_prefix}, you can supply +a glue specification and the unique \code{.value} to create custom column names.} } \value{ a tibble diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 027bc7d2..56325995 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -73,6 +73,33 @@ test_that("combine_checkboxes works for nonrepeat instrument", { expect_equal(out, expected_out) }) +test_that("combine_checkboxes glue spec works", { + name_appender <- "suffix" + + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = starts_with("multi"), + names_glue = "{.value}_{name_appender}", + multi_value_label = "multiple", # multi_value_label declared + values_fill = "none" # values_fill declared + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"multi_suffix", + 1, TRUE, FALSE, FALSE, TRUE, 1, "Red", + 2, TRUE, TRUE, FALSE, TRUE, 2, "multiple", + 3, FALSE, FALSE, FALSE, FALSE, 3, "none" + ) %>% + mutate( + multi_suffix = factor(multi_suffix, levels = c("Red", "Yellow", "Blue", "multiple", "none")) + ) + + expect_equal(out, expected_out) +}) + test_that("combine_checkboxes works for nonrepeat instrument and drop old values", { out <- combine_checkboxes( supertbl = supertbl, From abefbee242f919ab01e7a6f8e08f5149ed312c5c Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 5 Aug 2024 16:57:43 -0400 Subject: [PATCH 35/39] Add glue support with names_glue --- DESCRIPTION | 1 + R/combine_checkboxes.R | 9 ++++++++- man/combine_and_repair_tbls.Rd | 5 +++++ man/combine_checkboxes.Rd | 5 +++++ tests/testthat/test-combine_checkboxes.R | 6 ++---- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6019ff00..91a7cdbe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -38,6 +38,7 @@ Imports: stats Suggests: covr, + glue, knitr, labelled, lintr, diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 027b2f0f..20bd638b 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -24,6 +24,10 @@ #' @param names_sep String to separate new column names from `names_prefix`. #' @param names_glue Instead of `names_sep` and `names_prefix`, you can supply #' a glue specification and the unique `.value` to create custom column names. +#' @param names_repair What happens if the output has invalid column names? +#' The default, "check_unique" is to error if the columns are duplicated. +#' Use "minimal" to allow duplicates in the output, or "unique" to de-duplicated +#' by adding numeric suffixes. See [vctrs::vec_as_names()] for more options. #' @param multi_value_label A string specifying the value to be used when multiple #' checkbox fields are selected. Default "Multiple". #' @param values_fill Value to use when no checkboxes are selected. Default `NA`. @@ -63,6 +67,8 @@ combine_checkboxes <- function(supertbl, check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata")) check_arg_is_character(names_prefix, len = 1) check_arg_is_character(names_sep, len = 1, any.missing = TRUE) + check_arg_is_character(names_repair, len = 1, any.missing = FALSE) + check_arg_is_character(names_glue, len = 1, any.missing = FALSE, null.ok = TRUE) check_arg_is_character(tbl, len = 1, any.missing = FALSE) check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE) check_arg_is_character(values_fill, len = 1, any.missing = TRUE) @@ -139,6 +145,7 @@ get_metadata_spec <- function(metadata_tbl, # Create a metadata reference table linking field name to raw and label values if (!is.null(names_glue)) { + check_installed("glue", reason = "to use `names_glue` in `combine_checkboxes()`") # Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep out <- metadata_tbl %>% filter(.data$field_name %in% selected_cols) %>% @@ -152,7 +159,7 @@ get_metadata_spec <- function(metadata_tbl, mutate( .value = sub("___.*$", "", .data$field_name), .new_value = case_when(names_prefix != "" ~ paste(names_prefix, .value, sep = names_sep), - .default = paste(names_prefix, .data$.value, sep = "") + .default = paste(names_prefix, .data$.value, sep = "") ) ) } diff --git a/man/combine_and_repair_tbls.Rd b/man/combine_and_repair_tbls.Rd index e6b35e86..edb4f653 100644 --- a/man/combine_and_repair_tbls.Rd +++ b/man/combine_and_repair_tbls.Rd @@ -12,6 +12,11 @@ combine_and_repair_tbls(data_tbl, data_tbl_mod, new_cols, names_repair) \item{data_tbl_mod}{A modified data table from \code{data_tbl}} \item{new_cols}{The new columns created for checkbox combination} + +\item{names_repair}{What happens if the output has invalid column names? +The default, "check_unique" is to error if the columns are duplicated. +Use "minimal" to allow duplicates in the output, or "unique" to de-duplicated +by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more options.} } \value{ a tibble diff --git a/man/combine_checkboxes.Rd b/man/combine_checkboxes.Rd index f95c74a3..3d78a441 100644 --- a/man/combine_checkboxes.Rd +++ b/man/combine_checkboxes.Rd @@ -33,6 +33,11 @@ single column. Required.} \item{names_glue}{Instead of \code{names_sep} and \code{names_prefix}, you can supply a glue specification and the unique \code{.value} to create custom column names.} +\item{names_repair}{What happens if the output has invalid column names? +The default, "check_unique" is to error if the columns are duplicated. +Use "minimal" to allow duplicates in the output, or "unique" to de-duplicated +by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more options.} + \item{multi_value_label}{A string specifying the value to be used when multiple checkbox fields are selected. Default "Multiple".} diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 56325995..8f2f3af7 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -74,13 +74,11 @@ test_that("combine_checkboxes works for nonrepeat instrument", { }) test_that("combine_checkboxes glue spec works", { - name_appender <- "suffix" - out <- combine_checkboxes( supertbl = supertbl, tbl = "nonrepeat_instrument", cols = starts_with("multi"), - names_glue = "{.value}_{name_appender}", + names_glue = "{.value}_suffix", multi_value_label = "multiple", # multi_value_label declared values_fill = "none" # values_fill declared ) %>% @@ -149,7 +147,7 @@ test_that("get_metadata_spec works", { out <- get_metadata_spec( metadata_tbl = supertbl$redcap_metadata[[1]], selected_cols = c("multi___1", "multi___2", "multi___3"), - names_prefix = "", names_sep = "_" # Mimic defaults + names_prefix = "", names_sep = "_", names_glue = NULL # Mimic defaults ) expected_out <- tibble::tribble( From 06d13372f30bfabefe91e707b5774ab811fe8ee9 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 12 Aug 2024 13:05:30 -0400 Subject: [PATCH 36/39] Make glue dependency, remove install check --- DESCRIPTION | 2 +- R/combine_checkboxes.R | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 91a7cdbe..65931fb1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,7 @@ Imports: checkmate, cli, dplyr, + glue, lobstr, lubridate, purrr, @@ -38,7 +39,6 @@ Imports: stats Suggests: covr, - glue, knitr, labelled, lintr, diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 20bd638b..49a7e7df 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -145,7 +145,6 @@ get_metadata_spec <- function(metadata_tbl, # Create a metadata reference table linking field name to raw and label values if (!is.null(names_glue)) { - check_installed("glue", reason = "to use `names_glue` in `combine_checkboxes()`") # Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep out <- metadata_tbl %>% filter(.data$field_name %in% selected_cols) %>% From dcb10291f76d12a58aa77a4ab7e67f57137c4358 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 12 Aug 2024 14:36:46 -0400 Subject: [PATCH 37/39] Update glue spec handling - enforced check for new value levels - ensure failure still occurs for use of metadata col names --- R/combine_checkboxes.R | 35 ++++++++++++++++++------ tests/testthat/test-combine_checkboxes.R | 26 ++++++++++++++++++ 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 49a7e7df..d304b835 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -144,19 +144,36 @@ get_metadata_spec <- function(metadata_tbl, check_metadata_fields_exist(metadata_tbl, selected_cols) # Create a metadata reference table linking field name to raw and label values + out <- metadata_tbl %>% + filter(.data$field_name %in% selected_cols) %>% + mutate( + .value = sub("___.*$", "", .data$field_name) + ) + if (!is.null(names_glue)) { # Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep - out <- metadata_tbl %>% - filter(.data$field_name %in% selected_cols) %>% - mutate( - .value = sub("___.*$", "", .data$field_name), - .new_value = as.character(glue::glue(names_glue)) - ) + glue_env <- out %>% + select(.data$.value) %>% + mutate(.new_value = as.character(glue::glue_data(., names_glue))) %>% #nolint: object_usage_linter + select(.data$.new_value) + + out <- cbind(out, glue_env) + + # Enforce .new_value to be the same within each level of .value + values <- factor(out$.value, levels = unique(out$.value)) + + # Create a mapping of old levels to new levels + level_mapping <- setNames(unique(glue_env$.new_value), levels(values)) + + # Ensure new_values matches the levels of values + new_values <- factor(level_mapping[as.character(values)], + levels = unique(level_mapping) + ) + + out$.new_value <- as.character(new_values) } else { - out <- metadata_tbl %>% - filter(.data$field_name %in% selected_cols) %>% + out <- out %>% mutate( - .value = sub("___.*$", "", .data$field_name), .new_value = case_when(names_prefix != "" ~ paste(names_prefix, .value, sep = names_sep), .default = paste(names_prefix, .data$.value, sep = "") ) diff --git a/tests/testthat/test-combine_checkboxes.R b/tests/testthat/test-combine_checkboxes.R index 8f2f3af7..2aa2f1f8 100644 --- a/tests/testthat/test-combine_checkboxes.R +++ b/tests/testthat/test-combine_checkboxes.R @@ -96,6 +96,32 @@ test_that("combine_checkboxes glue spec works", { ) expect_equal(out, expected_out) + + # glue spec with multiple values + out <- combine_checkboxes( + supertbl = supertbl, + tbl = "nonrepeat_instrument", + cols = c(starts_with("multi"), starts_with("single_checkbox")), + names_glue = "{.value}_suffix", + multi_value_label = "multiple", # multi_value_label declared + values_fill = "none" # values_fill declared + ) %>% + pull(redcap_data) %>% + dplyr::first() + + expected_out <- tibble::tribble( + ~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", + ~"extra_data", ~"multi_suffix", ~"single_checkbox_suffix", + 1, TRUE, FALSE, FALSE, TRUE, 1, "Red", "Green", + 2, TRUE, TRUE, FALSE, TRUE, 2, "multiple", "Green", + 3, FALSE, FALSE, FALSE, FALSE, 3, "none", "none" + ) %>% + mutate( + multi_suffix = factor(multi_suffix, levels = c("Red", "Yellow", "Blue", "multiple", "none")), + single_checkbox_suffix = factor(single_checkbox_suffix, levels = c("Green", "multiple", "none")) + ) + + expect_equal(out, expected_out) }) test_that("combine_checkboxes works for nonrepeat instrument and drop old values", { From 02956500e58b478107ecf08dcffcee9382d23954 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 12 Aug 2024 16:58:20 -0400 Subject: [PATCH 38/39] check_equal_col_summaries() implementation --- NAMESPACE | 1 + R/REDCapTidieR-package.R | 2 +- R/checks.R | 36 ++++++++++++++++++++++++++++++++ R/combine_checkboxes.R | 18 ++++------------ man/check_equal_col_summaries.Rd | 20 ++++++++++++++++++ tests/testthat/test-checks.R | 21 +++++++++++++++++++ 6 files changed, 83 insertions(+), 15 deletions(-) create mode 100644 man/check_equal_col_summaries.Rd diff --git a/NAMESPACE b/NAMESPACE index a9fbdb2a..64b1519b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,6 +51,7 @@ importFrom(dplyr,if_any) importFrom(dplyr,if_else) importFrom(dplyr,left_join) importFrom(dplyr,mutate) +importFrom(dplyr,n_distinct) importFrom(dplyr,nth) importFrom(dplyr,pull) importFrom(dplyr,recode) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index 16d54f28..bb42a6ce 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -6,7 +6,7 @@ #' @importFrom cli cli_abort cli_fmt cli_text cli_vec cli_warn qty #' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else #' left_join mutate pull recode relocate rename right_join row_number rowwise -#' select slice summarise ungroup coalesce cur_column bind_cols first nth +#' select slice summarise ungroup coalesce cur_column bind_cols first nth n_distinct #' @importFrom formattable percent #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date diff --git a/R/checks.R b/R/checks.R index f708ba43..c725dd70 100644 --- a/R/checks.R +++ b/R/checks.R @@ -741,3 +741,39 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) { ) } } + +#' @title Check equal distinct values between two columns +#' +#' @description +#' Takes a dataframe and two columns and checks if [n_distinct()] of the second +#' column is all unique based on grouping of the first column. +#' +#' @param data a dataframe +#' @param col1 a column to group by +#' @param col2 a column to check for uniqueness +#' +#' @keywords internal + +check_equal_col_summaries <- function(data, col1, col2, call = caller_env()) { + check <- data %>% + summarise( + .by = {{ col1 }}, + n = n_distinct({{ col2 }}) + ) %>% + pull(.data$n) + + if (!all(check == 1)) { + values1 <- data[[eval_select(enquo(col1), data)]] # nolint: object_usage_linter + values2 <- data[[eval_select(enquo(col2), data)]] # nolint: object_usage_linter + + msg <- c( + x = "Encountered unequal naming outputs.", + `!` = "{.code combine_checkboxes()} call resulted in column output: {.code {values1}} and new column output: {.code {values2}}." # nolint: line_length_linter + ) + + cli_abort( + msg, + class = c("unequal_col_summary", "REDCapTidieR_cond") + ) + } +} diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index d304b835..17de2f33 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -154,23 +154,10 @@ get_metadata_spec <- function(metadata_tbl, # Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep glue_env <- out %>% select(.data$.value) %>% - mutate(.new_value = as.character(glue::glue_data(., names_glue))) %>% #nolint: object_usage_linter + mutate(.new_value = as.character(glue::glue_data(., names_glue))) %>% # nolint: object_usage_linter select(.data$.new_value) out <- cbind(out, glue_env) - - # Enforce .new_value to be the same within each level of .value - values <- factor(out$.value, levels = unique(out$.value)) - - # Create a mapping of old levels to new levels - level_mapping <- setNames(unique(glue_env$.new_value), levels(values)) - - # Ensure new_values matches the levels of values - new_values <- factor(level_mapping[as.character(values)], - levels = unique(level_mapping) - ) - - out$.new_value <- as.character(new_values) } else { out <- out %>% mutate( @@ -180,6 +167,9 @@ get_metadata_spec <- function(metadata_tbl, ) } + # Check that for each unique value of .value there is one unique value of .new_value + check_equal_col_summaries(out, .value, .new_value) # nolint: object_usage_linter + # Make sure selection is checkbox metadata field type check_fields_are_checkboxes(out) diff --git a/man/check_equal_col_summaries.Rd b/man/check_equal_col_summaries.Rd new file mode 100644 index 00000000..68af2920 --- /dev/null +++ b/man/check_equal_col_summaries.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_equal_col_summaries} +\alias{check_equal_col_summaries} +\title{Check equal distinct values between two columns} +\usage{ +check_equal_col_summaries(data, col1, col2, call = caller_env()) +} +\arguments{ +\item{data}{a dataframe} + +\item{col1}{a column to group by} + +\item{col2}{a column to check for uniqueness} +} +\description{ +Takes a dataframe and two columns and checks if \code{\link[=n_distinct]{n_distinct()}} of the second +column is all unique based on grouping of the first column. +} +\keyword{internal} diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 3fefdf8d..47fa1f96 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -293,3 +293,24 @@ test_that("check_fields_are_checkboxes works", { expect_error(check_fields_are_checkboxes(metadata), class = "non_checkbox_fields") expect_no_error(check_fields_are_checkboxes(metadata_filtered)) }) + +test_that("check_equal_col_summaries works", { + data <- tibble::tribble( + ~"id", ~"col1", ~"col2", + 1, "A", "A1", + 2, "B", "B1", + 3, "C", "C1" + ) + + expect_no_error(check_equal_col_summaries(data, col1, col2)) + + error_data <- tibble::tribble( + ~"id", ~"col1", ~"col2", + 1, "A", "A1", + 2, "B", "B1", + 3, "B", "B2" + ) + + check_equal_col_summaries(error_data, col1, col2) %>% + expect_error(class = "unequal_col_summary") +}) From 127dd464f9f6187964927793230b4912ad836bfc Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Tue, 13 Aug 2024 12:17:12 -0400 Subject: [PATCH 39/39] Update error message check_equal_col_summaries() --- R/checks.R | 23 +++++++++++++++-------- R/combine_checkboxes.R | 1 + tests/testthat/test-checks.R | 7 ++++--- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/R/checks.R b/R/checks.R index c725dd70..bf48ffbe 100644 --- a/R/checks.R +++ b/R/checks.R @@ -755,25 +755,32 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) { #' @keywords internal check_equal_col_summaries <- function(data, col1, col2, call = caller_env()) { - check <- data %>% + summary <- data %>% summarise( .by = {{ col1 }}, n = n_distinct({{ col2 }}) - ) %>% + ) + + total_n <- summary %>% pull(.data$n) - if (!all(check == 1)) { - values1 <- data[[eval_select(enquo(col1), data)]] # nolint: object_usage_linter - values2 <- data[[eval_select(enquo(col2), data)]] # nolint: object_usage_linter + if (!all(total_n == 1)) { + col1_n_vals <- summary %>% + filter(.data$n > 1) %>% + pull(col1) + + col2_n_vals <- data %>% # nolint: object_usage_linter + filter(col1 %in% col1_n_vals) %>% + pull(col2) msg <- c( - x = "Encountered unequal naming outputs.", - `!` = "{.code combine_checkboxes()} call resulted in column output: {.code {values1}} and new column output: {.code {values2}}." # nolint: line_length_linter + x = "{.code {col1_n_vals}} checkbox field{?s} resulted in multiple output columns: {.code {col2_n_vals}}.", + `!` = "Check that {.code names_glue} defines only 1 output column for each checkbox field." # nolint: line_length_linter ) cli_abort( msg, - class = c("unequal_col_summary", "REDCapTidieR_cond") + class = c("names_glue_multi_checkbox", "REDCapTidieR_cond") ) } } diff --git a/R/combine_checkboxes.R b/R/combine_checkboxes.R index 17de2f33..a094788b 100644 --- a/R/combine_checkboxes.R +++ b/R/combine_checkboxes.R @@ -168,6 +168,7 @@ get_metadata_spec <- function(metadata_tbl, } # Check that for each unique value of .value there is one unique value of .new_value + # May be removed in the future check_equal_col_summaries(out, .value, .new_value) # nolint: object_usage_linter # Make sure selection is checkbox metadata field type diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 47fa1f96..54573941 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -307,10 +307,11 @@ test_that("check_equal_col_summaries works", { error_data <- tibble::tribble( ~"id", ~"col1", ~"col2", 1, "A", "A1", - 2, "B", "B1", - 3, "B", "B2" + 2, "A", "A2", + 3, "B", "B1", + 4, "B", "B2" ) check_equal_col_summaries(error_data, col1, col2) %>% - expect_error(class = "unequal_col_summary") + expect_error(class = "names_glue_multi_checkbox") })