diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c9b86ec0..062e53d4 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -29,6 +29,7 @@ jobs: REDCAPTIDIER_LARGE_SPARSE_API: ${{ secrets.REDCAPTIDIER_LARGE_SPARSE_API }} REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }} REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }} + REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }} steps: - name: Update Ubuntu, Install cURL Headers, add Libraries run: | diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 1dd23590..0b6da316 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -25,6 +25,7 @@ jobs: SUPERHEROES_REDCAP_API: ${{ secrets.SUPERHEROES_REDCAP_API }} REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API: ${{ secrets.REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API }} REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }} + REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }} steps: - uses: actions/checkout@v3 diff --git a/DESCRIPTION b/DESCRIPTION index 3fd75276..a55b0122 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: REDCapTidieR Type: Package Title: Extract 'REDCap' Databases into Tidy 'Tibble's -Version: 1.0.0 +Version: 1.1.0 Authors@R: c( person("Richard", "Hanna", , "richardshanna91@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0009-0005-6496-8154")), diff --git a/NAMESPACE b/NAMESPACE index b3f04e42..137cf7a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -92,6 +92,7 @@ importFrom(rlang,try_fetch) importFrom(rlang,zap) importFrom(stringi,stri_split_fixed) importFrom(stringr,str_detect) +importFrom(stringr,str_ends) importFrom(stringr,str_replace) importFrom(stringr,str_replace_all) importFrom(stringr,str_squish) diff --git a/NEWS.md b/NEWS.md index 41de6306..8753513c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +# REDCapTidieR 1.1.0 + +- `read_redcap()` now supports instruments that follow a mixed repeating/non-repeating structure with the `allow_mixed_structure` parameter +- When enabled, instruments with mixed repeating/nonrepeating structure will be treated as single-instance repeating instruments + # REDCapTidieR 1.0.0 Version 1.0.0 diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index 1f932d5f..c277dc2b 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -18,7 +18,7 @@ #' is_installed new_environment quo_get_expr try_fetch zap as_label #' @importFrom stringi stri_split_fixed #' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc -#' str_trim +#' str_trim str_ends #' @importFrom tibble as_tibble is_tibble tibble #' @importFrom tidyr complete fill pivot_wider nest unnest unnest_wider #' @importFrom tidyselect all_of any_of ends_with eval_select everything diff --git a/R/checks.R b/R/checks.R index b2a8a28b..06eb6e45 100644 --- a/R/checks.R +++ b/R/checks.R @@ -101,48 +101,25 @@ check_user_rights <- function(db_data, check_repeat_and_nonrepeat <- function(db_data, call = caller_env()) { - # Identify columns to check for repeat/nonrepeat behavior - safe_cols <- c( - names(db_data)[1], "redcap_event_name", - "redcap_repeat_instrument", "redcap_repeat_instance", - "redcap_data_access_group" - ) - - check_cols <- setdiff(names(db_data), safe_cols) - - # Set up check_data function that looks for repeating and nonrepeating - # behavior in a given column and returns a boolean - check_data <- function(db_data, check_col) { - # Repeating Check - rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"])) - - # Nonrepeating Check - nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"])) - - rep & nonrep - } - - # Create a simple dataframe, loop through check columns and append - # dataframe with column being checked and the output of check_data - out <- data.frame() - for (i in seq_along(check_cols)) { - rep_and_nonrep <- db_data %>% - check_data(check_col = check_cols[i]) - - field <- check_cols[i] - - out <- rbind(out, data.frame(field, rep_and_nonrep)) - out - } + out <- get_mixed_structure_fields(db_data = db_data) # Filter for violations out <- out %>% - filter(rep_and_nonrep) + filter(.data$rep_and_nonrep) # Produce error message if violations detected if (nrow(out) > 0) { - cli_abort(c("x" = "Instrument{?s} detected that ha{?s/ve} both repeating and - nonrepeating instances defined in the project: {out$field}"), + cli_abort( + c( + "x" = "Instrument{?s} detected that ha{?s/ve} both repeating and + nonrepeating instances defined in the project: {out$field}", + "i" = paste0( + "Set {.code allow_mixed_structure} to {.code TRUE} to override. ", + "See ", + "{.href [Mixed Structure Instruments](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#mixed-structure-instruments)} ", # nolint line_length_linter + "for more information." + ) + ), class = c("repeat_nonrepeat_instrument", "REDCapTidieR_cond"), call = call ) diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R index 122a4a4d..56402796 100644 --- a/R/clean_redcap_long.R +++ b/R/clean_redcap_long.R @@ -12,6 +12,10 @@ #' \code{REDCapR::redcap_metadata_read()$data} #' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap #' events/arms +#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating +#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions +#' as repeating instruments with a single instance. Applies to longitudinal projects +#' only. Default `FALSE`. #' #' @return #' Returns a \code{tibble} with list elements containing tidy dataframes. Users @@ -22,7 +26,8 @@ clean_redcap_long <- function(db_data_long, db_metadata_long, - linked_arms) { + linked_arms, + allow_mixed_structure = FALSE) { # Repeating Instrument Check ---- # Check if database supplied contains any repeating instruments to map onto # `redcap_repeat_*` variables @@ -33,30 +38,13 @@ clean_redcap_long <- function(db_data_long, assert_data_frame(db_data_long) assert_data_frame(db_metadata_long) - if (has_repeat_forms) { - check_repeat_and_nonrepeat(db_data_long) - } - - ## Repeating Instruments Logic ---- + ## Repeating Forms Assignment ---- + # Needed first to inform nonrepeating forms logic if (has_repeat_forms) { repeated_forms <- db_data_long %>% filter(!is.na(.data$redcap_repeat_instrument)) %>% pull(.data$redcap_repeat_instrument) %>% unique() - - repeated_forms_tibble <- tibble( - redcap_form_name = repeated_forms, - redcap_data = map( - .data$redcap_form_name, - ~ distill_repeat_table_long( - .x, - db_data_long, - db_metadata_long, - linked_arms - ) - ), - structure = "repeating" - ) } ## Nonrepeating Instruments Logic ---- @@ -86,6 +74,47 @@ clean_redcap_long <- function(db_data_long, structure = "nonrepeating" ) + ## Repeating Instruments Logic ---- + if (has_repeat_forms) { + # If mixed structure allowed, retrieve mixed structure forms + has_mixed_structure_forms <- FALSE # nolint: object_usage_linter + + mixed_structure_ref <- data.frame() + + if (allow_mixed_structure) { + # Retrieve mixed structure fields and forms in reference df + mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>% + filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>% + left_join(db_metadata_long %>% select(.data$field_name, .data$form_name), + by = "field_name" + ) + + # Update if project actually has mixed structure + has_mixed_structure_forms <- nrow(mixed_structure_ref) > 0 + } else { + check_repeat_and_nonrepeat(db_data_long) + } + + repeated_forms_tibble <- tibble( + redcap_form_name = repeated_forms, + redcap_data = map( + .data$redcap_form_name, + ~ distill_repeat_table_long( + .x, + db_data_long, + db_metadata_long, + linked_arms, + has_mixed_structure_forms = has_mixed_structure_forms, + mixed_structure_ref = mixed_structure_ref + ) + ), + structure = case_when( + has_mixed_structure_forms & redcap_form_name %in% mixed_structure_ref$form_name ~ "mixed", + TRUE ~ "repeating" + ) + ) + } + if (has_repeat_forms) { rbind(repeated_forms_tibble, nonrepeated_forms_tibble) } else { @@ -235,13 +264,19 @@ distill_nonrepeat_table_long <- function(form_name, #' \code{REDCapR::redcap_metadata_read()$data} #' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap #' events/arms +#' @param has_mixed_structure Whether the instrument under evaluation has a mixed +#' structure. Default `FALSE`. +#' @param name mixed_structure_ref A mixed structure reference dataframe supplied +#' by `get_mixed_structure_fields()`. #' #' @keywords internal distill_repeat_table_long <- function(form_name, db_data_long, db_metadata_long, - linked_arms) { + linked_arms, + has_mixed_structure_forms = FALSE, + mixed_structure_ref = NULL) { has_repeat_forms <- "redcap_repeat_instance" %in% names(db_data_long) my_record_id <- names(db_data_long)[1] @@ -275,6 +310,11 @@ distill_repeat_table_long <- function(form_name, my_fields <- c(my_fields, "redcap_data_access_group") } + # If has mixed structure, convert form + if (has_mixed_structure_forms) { + db_data_long <- convert_mixed_instrument(db_data_long, mixed_structure_ref %>% filter(form_name == my_form)) + } + # Setup data for loop redcap_arm linking db_data_long <- db_data_long %>% add_partial_keys(var = .data$redcap_event_name) %>% @@ -337,3 +377,89 @@ distill_repeat_table_long <- function(form_name, out %>% tibble() } + +#' @title Convert Mixed Structure Instruments to Repeating Instruments +#' +#' @description +#' For longitudinal projects where users set `allow_mixed_structure` to `TRUE`, +#' this function will handle the process of setting the nonrepeating parts of the +#' instrument to repeating ones with a single instance. +#' +#' @param db_data_long The longitudinal REDCap database output defined by +#' \code{REDCapR::redcap_read_oneshot()$data} +#' @param mixed_structure_ref Reference dataframe containing mixed structure +#' fields and forms. +#' +#' @return +#' Returns a \code{tibble} with list elements containing tidy dataframes. Users +#' can access dataframes under the \code{redcap_data} column with reference to +#' \code{form_name} and \code{structure} column details. +#' +#' @keywords internal + +convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) { + for (i in seq_len(nrow(mixed_structure_ref))) { + field <- mixed_structure_ref$field_name[i] + form <- mixed_structure_ref$form_name[i] + + # Create a logical mask for rows needing update + update_mask <- is.na(db_data_long$redcap_repeat_instance) & !is.na(db_data_long[[field]]) + + # Update redcap_repeat_instance + db_data_long$redcap_repeat_instance <- if_else(update_mask, 1, db_data_long$redcap_repeat_instance) + + # Update redcap_repeat_instrument + db_data_long$redcap_repeat_instrument <- if_else(update_mask, form, db_data_long$redcap_repeat_instrument) + } + + db_data_long +} + +#' @title Get Mixed Structure Instrument List +#' +#' @description +#' Define fields in a given project that are used in both a repeating and +#' nonrepeating manner. +#' +#' @param db_data The REDCap database output generated by +#' \code{REDCapR::redcap_read_oneshot()$data} +#' +#' @returns a dataframe +#' +#' @keywords internal + +get_mixed_structure_fields <- function(db_data) { + # Identify columns to check for repeat/nonrepeat behavior + safe_cols <- c( + names(db_data)[1], "redcap_event_name", + "redcap_repeat_instrument", "redcap_repeat_instance", + "redcap_data_access_group" + ) + + check_cols <- setdiff(names(db_data), safe_cols) + + # Set up check_data function that looks for repeating and nonrepeating + # behavior in a given column and returns a boolean + check_data <- function(db_data, check_col) { + # Repeating Check + rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"])) + + # Nonrepeating Check + nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"])) + + rep & nonrep + } + + # Create a simple dataframe, loop through check columns and append + # dataframe with column being checked and the output of check_data + out <- data.frame() + for (i in seq_along(check_cols)) { + rep_and_nonrep <- db_data %>% + check_data(check_col = check_cols[i]) + + field_name <- check_cols[i] + + out <- rbind(out, data.frame(field_name, rep_and_nonrep)) + } + out +} diff --git a/R/read_redcap.R b/R/read_redcap.R index 74e5cf25..e60b26d4 100644 --- a/R/read_redcap.R +++ b/R/read_redcap.R @@ -53,6 +53,10 @@ #' @param guess_max A positive [base::numeric] value #' passed to [readr::read_csv()] that specifies the maximum number of records to #' use for guessing column types. Default `.Machine$integer.max`. +#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating +#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions +#' as repeating instruments with a single instance. Applies to longitudinal projects +#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = FALSE)`. #' #' @examples #' \dontrun{ @@ -75,7 +79,8 @@ read_redcap <- function(redcap_uri, export_survey_fields = NULL, export_data_access_groups = NULL, suppress_redcapr_messages = TRUE, - guess_max = .Machine$integer.max) { + guess_max = .Machine$integer.max, + allow_mixed_structure = getOption("redcaptidier.allow.mixed.structure", FALSE)) { check_arg_is_character(redcap_uri, len = 1, any.missing = FALSE) check_arg_is_character(token, len = 1, any.missing = FALSE) check_arg_is_valid_token(token) @@ -84,6 +89,7 @@ read_redcap <- function(redcap_uri, check_arg_is_logical(export_survey_fields, len = 1, any.missing = FALSE, null.ok = TRUE) check_arg_is_logical(export_data_access_groups, len = 1, any.missing = FALSE, null.ok = TRUE) check_arg_is_logical(suppress_redcapr_messages, len = 1, any.missing = FALSE) + check_arg_is_logical(allow_mixed_structure, len = 1, any.missing = FALSE) # Load REDCap Metadata ---- # Capture unexpected metadata API call errors @@ -267,7 +273,8 @@ read_redcap <- function(redcap_uri, out <- clean_redcap_long( db_data_long = db_data, db_metadata_long = db_metadata, - linked_arms = linked_arms + linked_arms = linked_arms, + allow_mixed_structure = allow_mixed_structure ) } else { out <- clean_redcap( diff --git a/inst/testdata/db_metadata_mixed_structure.RDS b/inst/testdata/db_metadata_mixed_structure.RDS new file mode 100644 index 00000000..490ebbdc Binary files /dev/null and b/inst/testdata/db_metadata_mixed_structure.RDS differ diff --git a/inst/testdata/db_mixed_structure.RDS b/inst/testdata/db_mixed_structure.RDS new file mode 100644 index 00000000..91b86a2a Binary files /dev/null and b/inst/testdata/db_mixed_structure.RDS differ diff --git a/inst/testdata/db_mixed_structure_linked_arms.RDS b/inst/testdata/db_mixed_structure_linked_arms.RDS new file mode 100644 index 00000000..5892f5b2 Binary files /dev/null and b/inst/testdata/db_mixed_structure_linked_arms.RDS differ diff --git a/man/clean_redcap_long.Rd b/man/clean_redcap_long.Rd index 9926f46f..46e5fcf0 100644 --- a/man/clean_redcap_long.Rd +++ b/man/clean_redcap_long.Rd @@ -4,7 +4,12 @@ \alias{clean_redcap_long} \title{Extract longitudinal REDCap databases into tidy tibbles} \usage{ -clean_redcap_long(db_data_long, db_metadata_long, linked_arms) +clean_redcap_long( + db_data_long, + db_metadata_long, + linked_arms, + allow_mixed_structure = FALSE +) } \arguments{ \item{db_data_long}{The longitudinal REDCap database output defined by @@ -15,6 +20,11 @@ clean_redcap_long(db_data_long, db_metadata_long, linked_arms) \item{linked_arms}{Output of \code{link_arms}, linking instruments to REDCap events/arms} + +\item{allow_mixed_structure}{A logical to allow for support of mixed repeating/non-repeating +instruments. Setting to \code{TRUE} will treat the mixed instrument's non-repeating versions +as repeating instruments with a single instance. Applies to longitudinal projects +only. Default \code{FALSE}.} } \value{ Returns a \code{tibble} with list elements containing tidy dataframes. Users diff --git a/man/convert_mixed_instrument.Rd b/man/convert_mixed_instrument.Rd new file mode 100644 index 00000000..ed3a2816 --- /dev/null +++ b/man/convert_mixed_instrument.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_redcap_long.R +\name{convert_mixed_instrument} +\alias{convert_mixed_instrument} +\title{Convert Mixed Structure Instruments to Repeating Instruments} +\usage{ +convert_mixed_instrument(db_data_long, mixed_structure_ref) +} +\arguments{ +\item{db_data_long}{The longitudinal REDCap database output defined by +\code{REDCapR::redcap_read_oneshot()$data}} + +\item{mixed_structure_ref}{Reference dataframe containing mixed structure +fields and forms.} +} +\value{ +Returns a \code{tibble} with list elements containing tidy dataframes. Users +can access dataframes under the \code{redcap_data} column with reference to +\code{form_name} and \code{structure} column details. +} +\description{ +For longitudinal projects where users set \code{allow_mixed_structure} to \code{TRUE}, +this function will handle the process of setting the nonrepeating parts of the +instrument to repeating ones with a single instance. +} +\keyword{internal} diff --git a/man/distill_repeat_table_long.Rd b/man/distill_repeat_table_long.Rd index 814abbcb..6cecc0fc 100644 --- a/man/distill_repeat_table_long.Rd +++ b/man/distill_repeat_table_long.Rd @@ -8,7 +8,9 @@ distill_repeat_table_long( form_name, db_data_long, db_metadata_long, - linked_arms + linked_arms, + has_mixed_structure_forms = FALSE, + mixed_structure_ref = NULL ) } \arguments{ @@ -23,6 +25,12 @@ REDCap metadata.} \item{linked_arms}{Output of \code{link_arms}, linking instruments to REDCap events/arms} + +\item{has_mixed_structure}{Whether the instrument under evaluation has a mixed +structure. Default \code{FALSE}.} + +\item{name}{mixed_structure_ref A mixed structure reference dataframe supplied +by \code{get_mixed_structure_fields()}.} } \value{ A \code{tibble} of all data related to a specified \code{form_name} diff --git a/man/get_mixed_structure_fields.Rd b/man/get_mixed_structure_fields.Rd new file mode 100644 index 00000000..13340521 --- /dev/null +++ b/man/get_mixed_structure_fields.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/clean_redcap_long.R +\name{get_mixed_structure_fields} +\alias{get_mixed_structure_fields} +\title{Get Mixed Structure Instrument List} +\usage{ +get_mixed_structure_fields(db_data) +} +\arguments{ +\item{db_data}{The REDCap database output generated by +\code{REDCapR::redcap_read_oneshot()$data}} +} +\value{ +a dataframe +} +\description{ +Define fields in a given project that are used in both a repeating and +nonrepeating manner. +} +\keyword{internal} diff --git a/man/read_redcap.Rd b/man/read_redcap.Rd index a57f4880..7cd5d2bb 100644 --- a/man/read_redcap.Rd +++ b/man/read_redcap.Rd @@ -12,7 +12,8 @@ read_redcap( export_survey_fields = NULL, export_data_access_groups = NULL, suppress_redcapr_messages = TRUE, - guess_max = .Machine$integer.max + guess_max = .Machine$integer.max, + allow_mixed_structure = getOption("redcaptidier.allow.mixed.structure", FALSE) ) } \arguments{ @@ -45,6 +46,11 @@ from REDCapR API calls. Default \code{TRUE}.} \item{guess_max}{A positive \link[base:numeric]{base::numeric} value passed to \code{\link[readr:read_delim]{readr::read_csv()}} that specifies the maximum number of records to use for guessing column types. Default \code{.Machine$integer.max}.} + +\item{allow_mixed_structure}{A logical to allow for support of mixed repeating/non-repeating +instruments. Setting to \code{TRUE} will treat the mixed instrument's non-repeating versions +as repeating instruments with a single instance. Applies to longitudinal projects +only. Default \code{FALSE}. Can be set globally with \code{options(redcaptidier.allow.mixed.structure = FALSE)}.} } \value{ A \code{tibble} in which each row represents a REDCap instrument. It diff --git a/tests/testthat/test-clean_redcap_long.R b/tests/testthat/test-clean_redcap_long.R index 97d87b14..04f479f0 100644 --- a/tests/testthat/test-clean_redcap_long.R +++ b/tests/testthat/test-clean_redcap_long.R @@ -28,6 +28,15 @@ db_metadata_long_noarms <- readRDS( linked_arms_long_noarms <- readRDS( system.file("testdata/linked_arms_long_noarms.RDS", package = "REDCapTidieR") ) +db_mixed_structure <- readRDS( + system.file("testdata/db_mixed_structure.RDS", package = "REDCapTidieR") +) +db_metadata_mixed_structure <- readRDS( + system.file("testdata/db_metadata_mixed_structure.RDS", package = "REDCapTidieR") +) +db_mixed_structure_linked_arms <- readRDS( + system.file("testdata/db_mixed_structure_linked_arms.RDS", package = "REDCapTidieR") +) # Run Tests ---- test_that("clean_redcap_long with arms works", { @@ -72,6 +81,59 @@ test_that("clean_redcap_long without arms works", { expect_true(!is.null(out$redcap_data)) }) +test_that("clean_redcap_long with mixed structure works", { + # Required since amendments take place before clean_redcap_long call in read_redcap + db_metadata_mixed_structure <- update_field_names(db_metadata_mixed_structure) + + # Expect error when allow_mixed_structure not specified + expect_error( + clean_redcap_long( + db_data_long = db_mixed_structure, + db_metadata_long = db_metadata_mixed_structure, + linked_arms = db_mixed_structure_linked_arms + ), + class = "repeat_nonrepeat_instrument" + ) + + out <- clean_redcap_long( + db_data_long = db_mixed_structure, + db_metadata_long = db_metadata_mixed_structure, + linked_arms = db_mixed_structure_linked_arms, + allow_mixed_structure = TRUE + ) + + # Check general structure, check all three structure types present + expect_true(is_tibble(out)) + expect_true("mixed" %in% out$structure) + expect_true("nonrepeating" %in% out$structure) + expect_true("repeating" %in% out$structure) + expect_true(!is.null(out$redcap_data)) + + # Check redcap_data contents for mixed and nonrepeating structure + expected_mixed_data <- tibble::tribble( + ~record_id, ~redcap_event, ~redcap_form_instance, ~mixed_structure_1, ~form_status_complete, + 1, "event_1", 1, "Mixed Nonrepeat 1", 0, + 1, "event_2", 1, "Mixed Repeat 1", 0, + 1, "event_2", 2, "Mixed Repeat 2", 0 + ) + + expected_nonrepeat_data <- tibble::tribble( + ~record_id, ~redcap_event, ~nonrepeat_1, ~form_status_complete, + 1, "event_1", "Nonrepeat 1", 0, + 1, "event_2", "Nonrepeat 2", 0 + ) + + expect_equal( + out$redcap_data[out$redcap_form_name == "mixed_structure_form"][[1]], + expected_mixed_data + ) + + expect_equal( + out$redcap_data[out$redcap_form_name == "nonrepeat_form"][[1]], + expected_nonrepeat_data + ) +}) + test_that("distill_nonrepeat_table_long tibble contains expected columns for longitudinal REDCap databases with arms", { ## Check longitudinal structure with arms ---- out <- distill_nonrepeat_table_long( @@ -208,3 +270,47 @@ test_that("distill_repeat_table_long no arms returns tables for REDCap dbs with any(c("redcap_repeat_instrument", "redcap_arm") %in% names(out)) ) }) + +test_that("get_mixed_structure_fields works", { + mixed_structure_db <- tibble::tribble( + ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, + 1, NA, NA, "A", + 2, "mixed_structure_form", 1, "B" + ) + + expected_out <- data.frame( + field_name = "mixed_structure_variable", + rep_and_nonrep = TRUE + ) + + out <- get_mixed_structure_fields(mixed_structure_db) + + expect_equal(out, expected_out) +}) + +test_that("convert_mixed_instrument works", { + mixed_structure_db <- tibble::tribble( + ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable, + 1, NA, NA, "A", NA, + 2, "mixed_structure_form", 1, "B", NA, + 3, "repeat_form", 1, NA, "C", + 4, "repeat_form", 2, NA, "D" + ) + + mixed_structure_ref <- tibble::tribble( + ~field_name, ~rep_and_nonrep, ~form_name, + "mixed_structure_variable", TRUE, "mixed_structure_form" + ) + + expected_out <- tibble::tribble( + ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable, + 1, "mixed_structure_form", 1, "A", NA, + 2, "mixed_structure_form", 1, "B", NA, + 3, "repeat_form", 1, NA, "C", + 4, "repeat_form", 2, NA, "D" + ) + + out <- convert_mixed_instrument(mixed_structure_db, mixed_structure_ref) + + expect_equal(out, expected_out) +}) diff --git a/utility/microbenchmark_results.csv b/utility/microbenchmark_results.csv index ef042006..3a08cbcd 100644 --- a/utility/microbenchmark_results.csv +++ b/utility/microbenchmark_results.csv @@ -1,41 +1,43 @@ -min,lq,mean,median,uq,max,neval -891.35,891.35,891.35,891.35,891.35,891.35,1 -1.42,1.42,1.42,1.42,1.42,1.42,1 -661.09,661.09,661.09,661.09,661.09,661.09,1 -2.23,2.23,2.23,2.23,2.23,2.23,1 -3.37,3.37,3.37,3.37,3.37,3.37,1 -699.25,699.25,699.25,699.25,699.25,699.25,1 -641.48,641.48,641.48,641.48,641.48,641.48,1 -596.4,596.4,596.4,596.4,596.4,596.4,1 -600.89,600.89,600.89,600.89,600.89,600.89,1 -747.66,747.66,747.66,747.66,747.66,747.66,1 -710.54,710.54,710.54,710.54,710.54,710.54,1 -632.86,632.86,632.86,632.86,632.86,632.86,1 -623.4,623.4,623.4,623.4,623.4,623.4,1 -588.44,588.44,588.44,588.44,588.44,588.44,1 -150.11,150.11,150.11,150.11,150.11,150.11,1 -654.46,654.46,654.46,654.46,654.46,654.46,1 -581.29,581.29,581.29,581.29,581.29,581.29,1 -1.06,1.06,1.06,1.06,1.06,1.06,1 -1.17,1.17,1.17,1.17,1.17,1.17,1 -601.02,601.02,601.02,601.02,601.02,601.02,1 -658.33,658.33,658.33,658.33,658.33,658.33,1 -635.63,635.63,635.63,635.63,635.63,635.63,1 -779.47,779.47,779.47,779.47,779.47,779.47,1 -610.22,610.22,610.22,610.22,610.22,610.22,1 -668.01,668.01,668.01,668.01,668.01,668.01,1 -720.67,720.67,720.67,720.67,720.67,720.67,1 -1.08,1.08,1.08,1.08,1.08,1.08,1 -1.07,1.07,1.07,1.07,1.07,1.07,1 -1.96,1.96,1.96,1.96,1.96,1.96,1 -1.48,1.48,1.48,1.48,1.48,1.48,1 -1.64,1.64,1.64,1.64,1.64,1.64,1 -2.16,2.16,2.16,2.16,2.16,2.16,1 -878.95,878.95,878.95,878.95,878.95,878.95,1 -1.73,1.73,1.73,1.73,1.73,1.73,1 -961.31,961.31,961.31,961.31,961.31,961.31,1 -1.14,1.14,1.14,1.14,1.14,1.14,1 -975.07,975.07,975.07,975.07,975.07,975.07,1 -1.55,1.55,1.55,1.55,1.55,1.55,1 -5.59,5.59,5.59,5.59,5.59,5.59,1 -8.51,8.51,8.51,8.51,8.51,8.51,1 +min,lq,mean,median,uq,max,neval,description,source +1.56,1.56,1.56,1.56,1.56,1.56,1,simple static (read-only) test project,ouhsc +2.08,2.08,2.08,2.08,2.08,2.08,1,longitudinal (read-only) ARM test project,ouhsc +841.23,841.23,841.23,841.23,841.23,841.23,1,simple write data,ouhsc +4.34,4.34,4.34,4.34,4.34,4.34,1,Russian Characters,ouhsc +6.67,6.67,6.67,6.67,6.67,6.67,1,"super-wide --3,000 columns",ouhsc +912.5,912.5,912.5,912.5,912.5,912.5,1,static (not longitudinal) survey test project,ouhsc +769.03,769.03,769.03,769.03,769.03,769.03,1,"Clinical Trial (Fake) --Read-only, contributed by @higgi13425",ouhsc +1.08,1.08,1.08,1.08,1.08,1.08,1,nonnumeric record_id,ouhsc +729.67,729.67,729.67,729.67,729.67,729.67,1,DAG Read,ouhsc +936.55,936.55,936.55,936.55,936.55,936.55,1,potentially problematic values,ouhsc +778.25,778.25,778.25,778.25,778.25,778.25,1,Repeating Instruments,ouhsc +1.62,1.62,1.62,1.62,1.62,1.62,1,simple write metadata,ouhsc +974.01,974.01,974.01,974.01,974.01,974.01,1,DAG Write -admin,ouhsc +791.47,791.47,791.47,791.47,791.47,791.47,1,DAG Write -group A,ouhsc +389.54,389.54,389.54,389.54,389.54,389.54,1,"super-wide #3--35,000 columns",ouhsc +827.92,827.92,827.92,827.92,827.92,827.92,1,Repeating Instruments --Sparse,ouhsc +636.88,636.88,636.88,636.88,636.88,636.88,1,Delete Single Arm,ouhsc +1.41,1.41,1.41,1.41,1.41,1.41,1,Delete Multiple Arm,ouhsc +1.14,1.14,1.14,1.14,1.14,1.14,1,longitudinal single arm,ouhsc +1.01,1.01,1.01,1.01,1.01,1.01,1,decimal comma and dot,ouhsc +997.98,997.98,997.98,997.98,997.98,997.98,1,decimal comma,ouhsc +900.25,900.25,900.25,900.25,900.25,900.25,1,decimal dot,ouhsc +1.04,1.04,1.04,1.04,1.04,1.04,1,Validation Types,ouhsc +1.03,1.03,1.03,1.03,1.03,1.03,1,Blank for Gray Status,ouhsc +903.09,903.09,903.09,903.09,903.09,903.09,1,Checkboxes 1,ouhsc +1.32,1.32,1.32,1.32,1.32,1.32,1,Vignette: Longitudinal & Repeating Measures,ouhsc +2.49,2.49,2.49,2.49,2.49,2.49,1,classic,redcaptidier +3.02,3.02,3.02,3.02,3.02,3.02,1,classic no repeat,redcaptidier +3.36,3.36,3.36,3.36,3.36,3.36,1,longitudinal,redcaptidier +3.67,3.67,3.67,3.67,3.67,3.67,1,longitudinal no arms,redcaptidier +5.06,5.06,5.06,5.06,5.06,5.06,1,longitudinal no repeat,redcaptidier +5.01,5.01,5.01,5.01,5.01,5.01,1,deep dive vignette,redcaptidier +2.11,2.11,2.11,2.11,2.11,2.11,1,repeat first instrument,redcaptidier +3.35,3.35,3.35,3.35,3.35,3.35,1,repeat event,redcaptidier +2.46,2.46,2.46,2.46,2.46,2.46,1,restricted access,redcaptidier +2,2,2,2,2,2,1,large sparse db,redcaptidier +1.94,1.94,1.94,1.94,1.94,1.94,1,data access groups,redcaptidier +3.28,3.28,3.28,3.28,3.28,3.28,1,longitudinal data access groups,redcaptidier +3.15,3.15,3.15,3.15,3.15,3.15,1,mixed structure repeat no repeat,redcaptidier +10.44,10.44,10.44,10.44,10.44,10.44,1,prodigy db,redcaptidier +15.1,15.1,15.1,15.1,15.1,15.1,1,cart comprehensive db,redcaptidier +48.32,48.32,48.32,48.32,48.32,48.32,1,bmt outcomes db,redcaptidier diff --git a/utility/test_creds.R b/utility/test_creds.R index d42883aa..97c51353 100644 --- a/utility/test_creds.R +++ b/utility/test_creds.R @@ -31,17 +31,18 @@ redcaptidier_creds <- tibble::tribble( Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_LARGE_SPARSE_API"), "large sparse db", Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_DAG_API"), "data access groups", Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_LONGITUDINAL_DAG_API"), "longitudinal data access groups", + Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_MIXED_STRUCTURE_API"), "mixed structure repeat no repeat", Sys.getenv("REDCAP_URI"), Sys.getenv("PRODIGY_REDCAP_API"), "prodigy db", Sys.getenv("REDCAP_URI"), Sys.getenv("CART_COMP_REDCAP_API"), "cart comprehensive db", Sys.getenv("REDCAP_URI"), Sys.getenv("BMT_OUTCOMES_REDCAP_API"), "bmt outcomes db" ) # Combine Credentials -creds <- rbind(ouhsc_creds, redcaptidier_creds) +creds <- rbind(ouhsc_creds %>% mutate(source = "ouhsc"), redcaptidier_creds %>% mutate(source = "redcaptidier")) microbenchmark_fx <- function(redcap_uri, token, name, times = 1){ microbenchmark( - name = read_redcap(redcap_uri = redcap_uri, token = token), + name = read_redcap(redcap_uri = redcap_uri, token = token, allow_mixed_structure = TRUE), times = times ) } @@ -57,4 +58,5 @@ for (i in seq_along(microbenchmark_results)) { out %>% select(-expr) %>% mutate(across(tidyselect::everything(), ~round(., digits = 2))) %>% + cbind(., "description" = creds$comment, "source" = creds$source) %>% readr::write_csv("utility/microbenchmark_results.csv") diff --git a/vignettes/articles/diving_deeper.Rmd b/vignettes/articles/diving_deeper.Rmd index 127c690f..8d4d5e48 100644 --- a/vignettes/articles/diving_deeper.Rmd +++ b/vignettes/articles/diving_deeper.Rmd @@ -19,6 +19,7 @@ knitr::knit_exit() redcap_uri <- Sys.getenv("REDCAP_URI") superheroes_token <- Sys.getenv("SUPERHEROES_REDCAP_API") longitudinal_token <- Sys.getenv("REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API") +mixed_token <- Sys.getenv("REDCAPTIDIER_MIXED_STRUCTURE_API") survey_token <- Sys.getenv("REDCAPTIDIER_CLASSIC_API") dag_token <- Sys.getenv("REDCAPTIDIER_DAG_API") ``` @@ -80,7 +81,7 @@ super_hero_powers |> rmarkdown::paged_table() ``` -## Longitudinal REDCap projects +## Longitudinal REDCap Projects REDCap supports two main mechanisms to allow collecting the same data multiple times: [**repeating instruments**](glossary.html#repeating-instrument) and [**longitudinal projects**](glossary.html#longitudinal-project). In addition, a longitudinal project may have [**arms**](glossary.html#arm) and/or [**repeating events**](glossary.html#repeating-event). @@ -156,8 +157,6 @@ adverse_events |> It is possible to have a repeating instrument designated to multiple events, however this is an uncommon pattern. REDCapTidieR supports this scenario as well. -REDCapTidieR does *not* allow you to have the same instrument designated both as a repeating and as a nonrepeating instrument in different events. It will throw an error if it detects this. - The `unscheduled` **event** is a **repeating event**. Like adverse events, unscheduled visits aren't tied to a pre-determined study visit, and a patient could have zero, one, or multiple unscheduled visits. On the other hand, you might want to record the same kinds of data for an unscheduled visit as for a pre-determined regular visit and collect data in the same instruments, for example `physical_exam` and `hematology`. The granularity of these tables is one row per study subject per event per *event instance*. The subject had two unscheduled visits. `redcap_event_instance` allows us to match `physical_exam` and `hematology` responses which occurred on the same unscheduled visit. ```{r, results='hold'} @@ -174,7 +173,31 @@ Note that REDCapTidieR allows for an instrument to be associated with both repea REDCapTidieR supports projects with multiple arms. If you have a project with multiple arms, there will be an additional column `redcap_arm` to identify the arm that the row is associated with. -## Categorical variables +### Mixed Structure Instruments + +By default, REDCapTidieR does not allow you to have the same instrument designated both as a repeating and as a nonrepeating instrument in different events (i.e. a "mixed structure instrument"), and will throw an error if this is detected: + +``` +Error in `clean_redcap_long()` at REDCapTidieR/R/read_redcap.R:272:5: +✖ Instruments detected that have both repeating and nonrepeating instances defined in the project: mixed_structure_1 and mixed_structure_form_complete +ℹ Set `allow_mixed_structure` to `TRUE` to override. See Mixed Structure Instruments for more information. +``` + +This is because such a design inherently goes against [tidy](glossary.html#tidy) data principles. + +However, as of REDCapTidieR v1.1.0 it is now possible to override this behavior by setting `allow_mixed_structure` in `read_redcap()` to `TRUE`. When enabled, nonrepeating variants of mixed structure instruments will be treated as repeating instruments with a single repeating instance. + +Users are cautioned when enabling this feature, since it changes definitions in the original data output. To visually assist with this, you will see that `structure` in the [supertibble](glossary.html#supertibble) will say "mixed": + +```{r} +read_redcap(redcap_uri, + mixed_token, + allow_mixed_structure = TRUE +) |> + rmarkdown::paged_table() +``` + +## Categorical Variables REDCapTidieR performs a number of opinionated transformations on categorical [fields](glossary.html#field) to streamline exploring them and working with them. diff --git a/vignettes/glossary.Rmd b/vignettes/glossary.Rmd index 0d44116a..7382eaa1 100644 --- a/vignettes/glossary.Rmd +++ b/vignettes/glossary.Rmd @@ -227,7 +227,7 @@ The [skimr](https://docs.ropensci.org/skimr/) R package provides summary statist ### Structure {#structure} -The structure of an [instrument](#instrument) can be [repeating](#repeating-instrument) or [nonrepeating](#nonrepeating-instrument). The [supertibble](#supertibble) shows the instrument's structure in the `structure` column. The structure of a [project](#project) can be [classic](#classic-project), [longitudinal](#longitudinal-project), or longitudinal with [arms](#arm). The structure of an [event](#event) can be [repeating](#repeating-event) or [nonrepeating](#nonrepeating-event). The [granularity](#granularity) of a [data tibble](#data-tibble) depends on the structure of all three: the instrument, the project, and the events associated with the instrument. Note: REDCap does not allow repeating instruments inside a repeating event. See also: the section [Longitudinal REDCap projects](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#longitudinal-redcap-projects) in the [Diving Deeper vignette](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html). ↩︎ +The structure of an [instrument](#instrument) can be [repeating](#repeating-instrument), [nonrepeating](#nonrepeating-instrument), or mixed. The [supertibble](#supertibble) shows the instrument's structure in the `structure` column. The structure of a [project](#project) can be [classic](#classic-project), [longitudinal](#longitudinal-project), or longitudinal with [arms](#arm). The structure of an [event](#event) can be [repeating](#repeating-event) or [nonrepeating](#nonrepeating-event). As of REDCapTidier v1.1.0, [mixed structure instruments](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#mixed-structure-instruments) are supported. The [granularity](#granularity) of a [data tibble](#data-tibble) depends on the structure of all three: the instrument, the project, and the events associated with the instrument. Note: REDCap does not allow repeating instruments inside a repeating event. See also: the section [Longitudinal REDCap projects](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#longitudinal-redcap-projects) in the [Diving Deeper vignette](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html). ↩︎ ### Supertibble {#supertibble}