From 85244b4616a92eee4cf7cb47d73401122bc1aed6 Mon Sep 17 00:00:00 2001 From: Ezra Porter <60618324+ezraporter@users.noreply.github.com> Date: Mon, 25 Mar 2024 15:30:46 -0400 Subject: [PATCH] Handle Missing Data Codes (#182) * convert logical fields * warn for missing categorical vals * Update microbenchmark_results.csv * consolidate warnings * update docs --- .github/workflows/R-CMD-check.yaml | 1 + .github/workflows/test-coverage.yaml | 1 + DESCRIPTION | 3 +- NAMESPACE | 4 ++ R/REDCapTidieR-package.R | 3 +- R/checks.R | 82 ++++++++++++++++++++++++++ R/read_redcap.R | 2 +- R/utils.R | 73 +++++++++++++++++++++-- man/apply_labs_factor.Rd | 5 +- man/apply_labs_haven.Rd | 2 +- man/check_extra_field_values.Rd | 17 ++++++ man/check_field_is_logical.Rd | 15 +++++ man/multi_choice_to_labels.Rd | 9 ++- man/parse_logical_cols.Rd | 19 ++++++ man/read_redcap.Rd | 2 +- tests/testthat/test-checks.R | 23 ++++++++ tests/testthat/test-read_redcap.R | 18 ++++++ tests/testthat/test-utils.R | 32 ++++++++++ utility/cli_message_examples.R | 4 ++ utility/cli_message_examples_reprex.md | 31 ++++++++-- utility/microbenchmark_results.csv | 78 ++++++++++++------------ vignettes/REDCapTidieR.Rmd | 18 ++++++ vignettes/articles/diving_deeper.Rmd | 2 +- 23 files changed, 386 insertions(+), 58 deletions(-) create mode 100644 man/check_extra_field_values.Rd create mode 100644 man/check_field_is_logical.Rd create mode 100644 man/parse_logical_cols.Rd diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 062e53d4..0c0105ad 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -30,6 +30,7 @@ jobs: REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }} REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }} REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }} + REDCAPTIDIER_MDC_API: ${{ secrets.REDCAPTIDIER_MDC_API }} steps: - name: Update Ubuntu, Install cURL Headers, add Libraries run: | diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index 5f537546..71c4c27c 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -29,6 +29,7 @@ jobs: REDCAPTIDIER_LARGE_SPARSE_API: ${{ secrets.REDCAPTIDIER_LARGE_SPARSE_API }} REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }} REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }} + REDCAPTIDIER_MDC_API: ${{ secrets.REDCAPTIDIER_MDC_API }} steps: - name: Update Ubuntu, Install cURL Headers, add Libraries run: | diff --git a/DESCRIPTION b/DESCRIPTION index d8bb60e9..1c565c7a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -34,7 +34,8 @@ Imports: formattable, pillar, vctrs, - readr + readr, + stats Suggests: covr, knitr, diff --git a/NAMESPACE b/NAMESPACE index bd72cbd5..60266b24 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -62,11 +62,14 @@ importFrom(lubridate,is.difftime) importFrom(lubridate,is.period) importFrom(pillar,tbl_sum) importFrom(purrr,compose) +importFrom(purrr,discard) +importFrom(purrr,flatten_chr) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_int) importFrom(purrr,map_lgl) importFrom(purrr,pluck) +importFrom(purrr,pmap) importFrom(purrr,pmap_chr) importFrom(purrr,some) importFrom(readr,parse_character) @@ -102,6 +105,7 @@ importFrom(rlang,new_environment) importFrom(rlang,quo_get_expr) importFrom(rlang,try_fetch) importFrom(rlang,zap) +importFrom(stats,na.omit) importFrom(stringi,stri_split_fixed) importFrom(stringr,str_detect) importFrom(stringr,str_ends) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index e4b3a88e..b4e150a2 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -9,7 +9,7 @@ #' @importFrom formattable percent #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date -#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some +#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some pmap discard flatten_chr #' @importFrom REDCapR redcap_arm_export redcap_event_instruments redcap_instruments #' redcap_metadata_read redcap_read_oneshot sanitize_token #' @importFrom rlang .data !!! abort as_closure caller_arg caller_env catch_cnd @@ -27,6 +27,7 @@ #' @importFrom pillar tbl_sum #' @importFrom readr parse_logical parse_integer parse_double parse_date parse_time #' parse_datetime parse_character +#' @importFrom stats na.omit "_PACKAGE" ## usethis namespace: start diff --git a/R/checks.R b/R/checks.R index 06eb6e45..8a73f0d8 100644 --- a/R/checks.R +++ b/R/checks.R @@ -577,3 +577,85 @@ check_file_exists <- function(file, overwrite, call = caller_env()) { ) } } + +#' @title +#' Parse logical field and compile data for warning if parsing errors occurred +#' +#' @param x vector to parse +#' +#' @keywords internal +check_field_is_logical <- function(x) { + out <- list(parsed = NULL, problems = NULL) + # If already logical just return it + if (is.logical(x)) { + out$parsed <- x + return(out) + } + # Parse + cnd <- NULL + out$parsed <- withCallingHandlers( + { + parse_logical(as.character(x)) + }, + warning = function(w) { + cnd <<- w + cnd_muffle(w) + } + ) + # Check for parsing failures and warn if found + probs <- attr(out$parsed, "problems") + if (!is.null(probs)) { + if (!getOption("redcaptidier.allow.mdc", FALSE)) { + out$problems <- unique(probs$actual) + } + attr(out$parsed, "problems") <- NULL + } else if (!is.null(cnd)) { + # If there was some other warning we didn't mean to catch it, so re-raise + cli_warn(cnd) + } + out +} + +#' @title +#' Check data field for field values not in metadata +#' +#' @param x data field +#' @param values expected field values +#' +#' @keywords internal +check_extra_field_values <- function(x, values) { + extra_vals <- setdiff(as.character(x), values) |> na.omit() + if (length(extra_vals) == 0) { + return(NULL) + } + as.character(extra_vals) +} + +check_extra_field_values_message <- function(extra_field_values, call = caller_env()) { + extra_field_values <- extra_field_values |> + discard(is.null) + + if (length(extra_field_values) == 0) { + return(NULL) + } + + fields <- names(extra_field_values) + values <- flatten_chr(extra_field_values) |> unique() + + msg <- c( + `!` = "{.code {fields}} contain{?s/} values with no labels: {values}", + i = "These were converted to {.code NA} resulting in possible data loss", + i = "Does your REDCap project utilize missing data codes?", + i = paste( + "Silence this warning with {.code options(redcaptidier.allow.mdc = TRUE)} or", + "set {.code raw_or_label = 'raw'} to access missing data codes" + ) + ) + cli_warn( + msg, + class = c("extra_field_values", "REDCapTidieR_cond"), + call = call, + fields = fields, + values = values + ) +} diff --git a/R/read_redcap.R b/R/read_redcap.R index 4c1f627a..a50b6ebf 100644 --- a/R/read_redcap.R +++ b/R/read_redcap.R @@ -57,7 +57,7 @@ #' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating #' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions #' as repeating instruments with a single instance. Applies to longitudinal projects -#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = FALSE)`. +#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = TRUE)`. #' #' @examples #' \dontrun{ diff --git a/R/utils.R b/R/utils.R index c0334b3e..b7ee6ccf 100644 --- a/R/utils.R +++ b/R/utils.R @@ -397,11 +397,12 @@ update_data_col_names <- function(db_data, db_metadata) { #' #' @param db_data A REDCap database object #' @param db_metadata A REDCap metadata object +#' @param call call for conditions #' @inheritParams read_redcap #' #' @keywords internal -multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label") { +multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label", call = caller_env()) { if (raw_or_label == "label") { label_handler <- apply_labs_factor } else if (raw_or_label == "haven") { @@ -426,12 +427,11 @@ multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label") # Logical Column Handling ---- # Handle columns where we change 0/1 to FALSE/TRUE (logical) - logical_cols <- db_metadata %>% - filter(.data$field_type %in% c("yesno", "truefalse", "checkbox")) %>% - pull(.data$field_name_updated) + db_data <- parse_logical_cols(db_data, db_metadata, call = call) - db_data <- db_data %>% - mutate(across(.cols = all_of(logical_cols), as.logical)) + # Buffer for fields with extra field values to be populated by check_extra_field_values + extra_field_values <- vector("list", length = nrow(db_metadata)) + names(extra_field_values) <- db_metadata$field_name_updated for (i in seq_len(nrow(db_metadata))) { # Extract metadata field name and database corresponding column name @@ -470,6 +470,13 @@ multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label") warn_stripped_text = stripped_text_flag ) + if (!getOption("redcaptidier.allow.mdc", FALSE)) { + extra_field_values[i] <- check_extra_field_values( + db_data[[field_name]], + names(parse_labels_output) + ) + } + # Replace values from db_data$(field_name) with label values from # parse_labels key @@ -480,9 +487,63 @@ multi_choice_to_labels <- function(db_data, db_metadata, raw_or_label = "label") ) } } + + check_extra_field_values_message(extra_field_values, call = call) + db_data } +#' @title +#' Convert yesno, truefalse, and checkbox fields to logical +#' +#' @inheritParams multi_choice_to_labels +#' +#' @keywords internal +parse_logical_cols <- function(db_data, db_metadata, call = caller_env()) { + logical_cols <- db_metadata %>% + filter(.data$field_type %in% c("yesno", "truefalse", "checkbox")) + + if (nrow(logical_cols) == 0) { + return(db_data) + } + + parsed <- map(db_data[logical_cols$field_name_updated], check_field_is_logical) + + out <- db_data + + out[logical_cols$field_name_updated] <- map(parsed, "parsed") + + if (!getOption("redcaptidier.allow.mdc", FALSE)) { + problems <- parsed |> + map("problems") |> + discard(is.null) + + if (length(problems) > 0) { + fields <- names(problems) + values <- flatten_chr(problems) |> unique() + + msg <- c( + `!` = "{.code {fields}} {?is/are} logical but contain{?s/} non-logical values: {values}", + i = "These were converted to {.code NA} resulting in possible data loss", + i = "Does your REDCap project utilize missing data codes?", + i = paste( + "Silence this warning with {.code options(redcaptidier.allow.mdc = TRUE)} or", + "set {.code raw_or_label = 'raw'} to access missing data codes" + ) + ) + cli_warn( + msg, + class = c("field_is_logical", "REDCapTidieR_cond"), + call = call, + fields = fields, + problems = values + ) + } + } + + out +} + #' @title #' Apply factor labels to a vector #' diff --git a/man/apply_labs_factor.Rd b/man/apply_labs_factor.Rd index b5e5f015..d9ea2abd 100644 --- a/man/apply_labs_factor.Rd +++ b/man/apply_labs_factor.Rd @@ -11,7 +11,7 @@ apply_labs_factor(x, labels, ...) \item{labels}{a named vector of labels in the format \code{c(value = label)}} -\item{\dots}{unused} +\item{\dots}{unused, needed to ignore extra arguments that may be passed} } \value{ factor @@ -19,4 +19,7 @@ factor \description{ Apply factor labels to a vector } +\details{ +Dots are needed to ignore \code{ptype} argument that may be passed to \code{apply_labs_haven} +} \keyword{internal} diff --git a/man/apply_labs_haven.Rd b/man/apply_labs_haven.Rd index ad35755d..b9521dcd 100644 --- a/man/apply_labs_haven.Rd +++ b/man/apply_labs_haven.Rd @@ -13,7 +13,7 @@ apply_labs_haven(x, labels, ptype, ...) \item{ptype}{vector to serve as prototype for label values} -\item{\dots}{unused} +\item{\dots}{unused, needed to ignore extra arguments that may be passed} } \value{ \code{haven_labelled} vector diff --git a/man/check_extra_field_values.Rd b/man/check_extra_field_values.Rd new file mode 100644 index 00000000..2ff73b2b --- /dev/null +++ b/man/check_extra_field_values.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_extra_field_values} +\alias{check_extra_field_values} +\title{Check data field for field values not in metadata} +\usage{ +check_extra_field_values(x, values) +} +\arguments{ +\item{x}{data field} + +\item{values}{expected field values} +} +\description{ +Check data field for field values not in metadata +} +\keyword{internal} diff --git a/man/check_field_is_logical.Rd b/man/check_field_is_logical.Rd new file mode 100644 index 00000000..d8279da2 --- /dev/null +++ b/man/check_field_is_logical.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/checks.R +\name{check_field_is_logical} +\alias{check_field_is_logical} +\title{Parse logical field and compile data for warning if parsing errors occurred} +\usage{ +check_field_is_logical(x) +} +\arguments{ +\item{x}{vector to parse} +} +\description{ +Parse logical field and compile data for warning if parsing errors occurred +} +\keyword{internal} diff --git a/man/multi_choice_to_labels.Rd b/man/multi_choice_to_labels.Rd index 2dca6b09..239b5397 100644 --- a/man/multi_choice_to_labels.Rd +++ b/man/multi_choice_to_labels.Rd @@ -4,7 +4,12 @@ \alias{multi_choice_to_labels} \title{Update multiple choice fields with label data} \usage{ -multi_choice_to_labels(db_data, db_metadata, raw_or_label = "label") +multi_choice_to_labels( + db_data, + db_metadata, + raw_or_label = "label", + call = caller_env() +) } \arguments{ \item{db_data}{A REDCap database object} @@ -15,6 +20,8 @@ multi_choice_to_labels(db_data, db_metadata, raw_or_label = "label") to export the raw coded values or the labels for the options of categorical fields. Default is 'label'. If 'haven' is supplied, categorical fields are converted to \code{haven_labelled} vectors.} + +\item{call}{call for conditions} } \description{ Update REDCap variables with multi-choice types to standard form labels taken diff --git a/man/parse_logical_cols.Rd b/man/parse_logical_cols.Rd new file mode 100644 index 00000000..e1ef72bd --- /dev/null +++ b/man/parse_logical_cols.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{parse_logical_cols} +\alias{parse_logical_cols} +\title{Convert yesno, truefalse, and checkbox fields to logical} +\usage{ +parse_logical_cols(db_data, db_metadata, call = caller_env()) +} +\arguments{ +\item{db_data}{A REDCap database object} + +\item{db_metadata}{A REDCap metadata object} + +\item{call}{call for conditions} +} +\description{ +Convert yesno, truefalse, and checkbox fields to logical +} +\keyword{internal} diff --git a/man/read_redcap.Rd b/man/read_redcap.Rd index 469899f5..7b7c9e15 100644 --- a/man/read_redcap.Rd +++ b/man/read_redcap.Rd @@ -51,7 +51,7 @@ use for guessing column types. Default \code{.Machine$integer.max}.} \item{allow_mixed_structure}{A logical to allow for support of mixed repeating/non-repeating instruments. Setting to \code{TRUE} will treat the mixed instrument's non-repeating versions as repeating instruments with a single instance. Applies to longitudinal projects -only. Default \code{FALSE}. Can be set globally with \code{options(redcaptidier.allow.mixed.structure = FALSE)}.} +only. Default \code{FALSE}. Can be set globally with \code{options(redcaptidier.allow.mixed.structure = TRUE)}.} } \value{ A \code{tibble} in which each row represents a REDCap instrument. It diff --git a/tests/testthat/test-checks.R b/tests/testthat/test-checks.R index 17977707..b6713790 100644 --- a/tests/testthat/test-checks.R +++ b/tests/testthat/test-checks.R @@ -223,3 +223,26 @@ test_that("check_file_exists works", { ) }) }) + +test_that("check_field_is_logical works", { + expect_equal( + check_field_is_logical(c(TRUE, FALSE, NA)), + list(parsed = c(TRUE, FALSE, NA), problems = NULL) + ) + expect_equal( + check_field_is_logical(c(1, 0, NA)), + list(parsed = c(TRUE, FALSE, NA), problems = NULL) + ) + expect_equal( + check_field_is_logical(c(1, 0, "x")), + list(parsed = c(TRUE, FALSE, NA), problems = "x") + ) +}) + +test_that("check_extra_field_values works", { + check_extra_field_values(c(1, NA, 2), c("1", "2")) |> + expect_null() + + check_extra_field_values(c(1, NA, 2), "1") |> + expect_equal("2") +}) diff --git a/tests/testthat/test-read_redcap.R b/tests/testthat/test-read_redcap.R index 70ebbd7c..3b34e15d 100644 --- a/tests/testthat/test-read_redcap.R +++ b/tests/testthat/test-read_redcap.R @@ -621,3 +621,21 @@ test_that("read_redcap fails if DAG or survey columns are explicitly requested b class = "nonexistent_arg_requested" ) }) + +test_that("read_redcap handles missing data codes", { + out <- read_redcap(Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_MDC_API")) |> + suppressWarnings(classes = c("field_is_logical", "extra_field_values")) |> + extract_tibble("form_1") + + # logicals are not converted to NA + expect_type(out$yesno, "logical") + expect_true(!all(is.na(out$yesno))) + # categoricals remove missing data codes + expect_factor(out$dropdown) + expect_true(all(is.na(out$dropdown) | out$dropdown != "UNK")) + + withr::with_options(list(redcaptidier.allow.mdc = TRUE), { + read_redcap(Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_MDC_API")) + }) |> + expect_no_warning() +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 65cd1b77..7adb5d5a 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -178,6 +178,38 @@ test_that("parse_labels works", { expect_equal(FALSE) }) +test_that("parse_logical_cols", { + db_data <- tibble::tibble( + record_id = 1:3, + yes_no = c("1", "0", "UNK"), + other_field = letters[1:3] + ) + db_metadata <- tibble::tibble( + field_name_updated = c("yes_no", "other_field"), + field_type = c("yesno", "text") + ) + + expect_warning(parse_logical_cols(db_data, db_metadata), class = "field_is_logical") + + out <- parse_logical_cols(db_data, db_metadata) |> + suppressWarnings(classes = "field_is_logical") + + expect_equal(dim(out), dim(db_data)) + expect_equal(out$record_id, db_data$record_id) + expect_equal(out$yes_no, c(TRUE, FALSE, NA)) + expect_equal(out$other_field, db_data$other_field) + + db_data <- tibble::tibble( + record_id = 1:3, + other_field = letters[1:3] + ) + db_metadata <- tibble::tibble( + field_name_updated = "other_field", + field_type = "text" + ) + expect_equal(parse_logical_cols(db_data, db_metadata), db_data) +}) + test_that("link_arms works", { skip_on_cran() diff --git a/utility/cli_message_examples.R b/utility/cli_message_examples.R index 2cd509e3..18b31ab1 100644 --- a/utility/cli_message_examples.R +++ b/utility/cli_message_examples.R @@ -147,3 +147,7 @@ withr::with_tempdir({ read_redcap(Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_CLASSIC_API")) %>% suppressWarnings() + +# missing data codes + +read_redcap(redcap_uri, Sys.getenv("REDCAPTIDIER_MDC_API")) diff --git a/utility/cli_message_examples_reprex.md b/utility/cli_message_examples_reprex.md index 32240493..61be21eb 100644 --- a/utility/cli_message_examples_reprex.md +++ b/utility/cli_message_examples_reprex.md @@ -1,6 +1,7 @@ ``` r devtools::load_all() #> ℹ Loading REDCapTidieR +#> Warning: package 'testthat' was built under R version 4.2.3 options(rlang_backtrace_on_error_report = "none") @@ -93,7 +94,7 @@ read_redcap(redcap_uri, classic_token, raw_or_label = "bad option") #> Error in `read_redcap()`: #> ✖ You've supplied `bad option` for `raw_or_label` which is not a valid #> value -#> ! Must be element of set {'label','raw'}, but is 'bad option' +#> ! Must be element of set {'label','raw','haven'}, but is 'bad option' ## forms @@ -255,7 +256,7 @@ withr::with_tempdir({ }) #> Error: #> ✖ File -#> ''/private/var/folders/9c/k1m0bzys7gb1v32g86hfn5sn5k86h1/T/RtmpHQI8WI/file135a1176243e2/temp.csv'' +#> ''/private/var/folders/9c/k1m0bzys7gb1v32g86hfn5sn5k86h1/T/Rtmp677YHR/filec4216b6201a/temp.csv'' #> already exists. #> ℹ Overwriting files is disabled by default. Set `overwrite = TRUE` to overwrite #> existing file. @@ -296,14 +297,14 @@ withr::with_tempdir({ write_redcap_xlsx(file = filepath) }) #> Warning in write_redcap_xlsx(., file = filepath): ! No extension provided for `file`: -#> '/private/var/folders/9c/k1m0bzys7gb1v32g86hfn5sn5k86h1/T/RtmpHQI8WI/file135a1324144c6/temp' +#> '/private/var/folders/9c/k1m0bzys7gb1v32g86hfn5sn5k86h1/T/Rtmp677YHR/filec421a35fb3d/temp' #> ℹ The extension '.xlsx' will be appended to the file name. # Printed supertibble read_redcap(Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_CLASSIC_API")) %>% suppressWarnings() -#> # A REDCapTidier Supertibble with 9 instruments +#> # A REDCapTidieR Supertibble with 9 instruments #> redcap_form_name redcap_form_label redcap_data redcap_metadata structure #> #> 1 nonrepeated Nonrepeated nonrepea… @@ -317,6 +318,26 @@ read_redcap(Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_CLASSIC_API")) %> #> 9 repeat_survey Repeat Survey repeating #> # ℹ 4 more variables: data_rows , data_cols , data_size , #> # data_na_pct + +# missing data codes + +read_redcap(redcap_uri, Sys.getenv("REDCAPTIDIER_MDC_API")) +#> Warning in read_redcap(redcap_uri, Sys.getenv("REDCAPTIDIER_MDC_API")): ! `yesno` is type 'yesno' but contains non-logical values: UNK +#> ℹ These were converted to `NA` resulting in possible data loss +#> ℹ Does your REDCap project utilize missing data codes? +#> ℹ Silence this warning with `options(redcaptidier.allow.mdc = TRUE)` or set +#> `raw_or_label = 'raw'` to access missing data codes +#> Warning in read_redcap(redcap_uri, Sys.getenv("REDCAPTIDIER_MDC_API")): ! `dropdown` contains values with no labels: UNK +#> ℹ These were converted to `NA` resulting in possible data loss +#> ℹ Does your REDCap project utilize missing data codes? +#> ℹ Silence this warning with `options(redcaptidier.allow.mdc = TRUE)` or set +#> `raw_or_label = 'raw'` to access missing data codes +#> # A REDCapTidieR Supertibble with 1 instruments +#> redcap_form_name redcap_form_label redcap_data redcap_metadata structure +#> +#> 1 form_1 Form 1 nonrepeat… +#> # ℹ 4 more variables: data_rows , data_cols , data_size , +#> # data_na_pct ``` -Created on 2024-03-12 with [reprex v2.1.0](https://reprex.tidyverse.org) +Created on 2024-03-21 with [reprex v2.1.0](https://reprex.tidyverse.org) diff --git a/utility/microbenchmark_results.csv b/utility/microbenchmark_results.csv index dab96054..63ee9ebd 100644 --- a/utility/microbenchmark_results.csv +++ b/utility/microbenchmark_results.csv @@ -1,42 +1,42 @@ min,lq,mean,median,uq,max,neval,description,source -0.99,0.99,0.99,0.99,0.99,0.99,1,simple static (read-only) test project,ouhsc -1.17,1.17,1.17,1.17,1.17,1.17,1,longitudinal (read-only) ARM test project,ouhsc -0.62,0.62,0.62,0.62,0.62,0.62,1,simple write data,ouhsc -1.68,1.68,1.68,1.68,1.68,1.68,1,Russian Characters,ouhsc -3.6,3.6,3.6,3.6,3.6,3.6,1,"super-wide --3,000 columns",ouhsc -0.61,0.61,0.61,0.61,0.61,0.61,1,static (not longitudinal) survey test project,ouhsc -0.65,0.65,0.65,0.65,0.65,0.65,1,"Clinical Trial (Fake) --Read-only, contributed by @higgi13425",ouhsc -0.55,0.55,0.55,0.55,0.55,0.55,1,nonnumeric record_id,ouhsc -0.61,0.61,0.61,0.61,0.61,0.61,1,DAG Read,ouhsc +1.16,1.16,1.16,1.16,1.16,1.16,1,simple static (read-only) test project,ouhsc +1.18,1.18,1.18,1.18,1.18,1.18,1,longitudinal (read-only) ARM test project,ouhsc +0.6,0.6,0.6,0.6,0.6,0.6,1,simple write data,ouhsc +1.79,1.79,1.79,1.79,1.79,1.79,1,Russian Characters,ouhsc +3.2,3.2,3.2,3.2,3.2,3.2,1,"super-wide --3,000 columns",ouhsc +0.68,0.68,0.68,0.68,0.68,0.68,1,static (not longitudinal) survey test project,ouhsc +0.68,0.68,0.68,0.68,0.68,0.68,1,"Clinical Trial (Fake) --Read-only, contributed by @higgi13425",ouhsc +0.58,0.58,0.58,0.58,0.58,0.58,1,nonnumeric record_id,ouhsc +0.6,0.6,0.6,0.6,0.6,0.6,1,DAG Read,ouhsc 0.58,0.58,0.58,0.58,0.58,0.58,1,potentially problematic values,ouhsc 0.68,0.68,0.68,0.68,0.68,0.68,1,Repeating Instruments,ouhsc -0.84,0.84,0.84,0.84,0.84,0.84,1,simple write metadata,ouhsc -0.71,0.71,0.71,0.71,0.71,0.71,1,DAG Write -admin,ouhsc -0.65,0.65,0.65,0.65,0.65,0.65,1,DAG Write -group A,ouhsc -132.09,132.09,132.09,132.09,132.09,132.09,1,"super-wide #3--35,000 columns",ouhsc -0.69,0.69,0.69,0.69,0.69,0.69,1,Repeating Instruments --Sparse,ouhsc -0.59,0.59,0.59,0.59,0.59,0.59,1,Delete Single Arm,ouhsc -1.09,1.09,1.09,1.09,1.09,1.09,1,Delete Multiple Arm,ouhsc -1.12,1.12,1.12,1.12,1.12,1.12,1,longitudinal single arm,ouhsc -0.66,0.66,0.66,0.66,0.66,0.66,1,decimal comma and dot,ouhsc -0.8,0.8,0.8,0.8,0.8,0.8,1,decimal comma,ouhsc -0.62,0.62,0.62,0.62,0.62,0.62,1,decimal dot,ouhsc -0.63,0.63,0.63,0.63,0.63,0.63,1,Validation Types,ouhsc -0.61,0.61,0.61,0.61,0.61,0.61,1,Blank for Gray Status,ouhsc -0.62,0.62,0.62,0.62,0.62,0.62,1,Checkboxes 1,ouhsc -0.61,0.61,0.61,0.61,0.61,0.61,1,Vignette: Longitudinal & Repeating Measures,ouhsc -0.86,0.86,0.86,0.86,0.86,0.86,1,classic,redcaptidier -0.72,0.72,0.72,0.72,0.72,0.72,1,classic no repeat,redcaptidier -1.08,1.08,1.08,1.08,1.08,1.08,1,longitudinal,redcaptidier -1.08,1.08,1.08,1.08,1.08,1.08,1,longitudinal no arms,redcaptidier -1.22,1.22,1.22,1.22,1.22,1.22,1,longitudinal no repeat,redcaptidier -1.47,1.47,1.47,1.47,1.47,1.47,1,deep dive vignette,redcaptidier -0.64,0.64,0.64,0.64,0.64,0.64,1,repeat first instrument,redcaptidier -1.03,1.03,1.03,1.03,1.03,1.03,1,repeat event,redcaptidier -0.99,0.99,0.99,0.99,0.99,0.99,1,restricted access,redcaptidier -0.67,0.67,0.67,0.67,0.67,0.67,1,large sparse db,redcaptidier -0.65,0.65,0.65,0.65,0.65,0.65,1,data access groups,redcaptidier -1.1,1.1,1.1,1.1,1.1,1.1,1,longitudinal data access groups,redcaptidier -1.38,1.38,1.38,1.38,1.38,1.38,1,mixed structure repeat no repeat,redcaptidier -3.55,3.55,3.55,3.55,3.55,3.55,1,prodigy db,redcaptidier -3.59,3.59,3.59,3.59,3.59,3.59,1,cart comprehensive db,redcaptidier +0.56,0.56,0.56,0.56,0.56,0.56,1,simple write metadata,ouhsc +0.65,0.65,0.65,0.65,0.65,0.65,1,DAG Write -admin,ouhsc +0.56,0.56,0.56,0.56,0.56,0.56,1,DAG Write -group A,ouhsc +130.23,130.23,130.23,130.23,130.23,130.23,1,"super-wide #3--35,000 columns",ouhsc +0.66,0.66,0.66,0.66,0.66,0.66,1,Repeating Instruments --Sparse,ouhsc +0.65,0.65,0.65,0.65,0.65,0.65,1,Delete Single Arm,ouhsc +1.04,1.04,1.04,1.04,1.04,1.04,1,Delete Multiple Arm,ouhsc +1.14,1.14,1.14,1.14,1.14,1.14,1,longitudinal single arm,ouhsc +0.62,0.62,0.62,0.62,0.62,0.62,1,decimal comma and dot,ouhsc +0.67,0.67,0.67,0.67,0.67,0.67,1,decimal comma,ouhsc +0.6,0.6,0.6,0.6,0.6,0.6,1,decimal dot,ouhsc +0.66,0.66,0.66,0.66,0.66,0.66,1,Validation Types,ouhsc +0.69,0.69,0.69,0.69,0.69,0.69,1,Blank for Gray Status,ouhsc +0.68,0.68,0.68,0.68,0.68,0.68,1,Checkboxes 1,ouhsc +0.67,0.67,0.67,0.67,0.67,0.67,1,Vignette: Longitudinal & Repeating Measures,ouhsc +0.9,0.9,0.9,0.9,0.9,0.9,1,classic,redcaptidier +0.82,0.82,0.82,0.82,0.82,0.82,1,classic no repeat,redcaptidier +1.27,1.27,1.27,1.27,1.27,1.27,1,longitudinal,redcaptidier +1.22,1.22,1.22,1.22,1.22,1.22,1,longitudinal no arms,redcaptidier +1.26,1.26,1.26,1.26,1.26,1.26,1,longitudinal no repeat,redcaptidier +1.62,1.62,1.62,1.62,1.62,1.62,1,deep dive vignette,redcaptidier +0.74,0.74,0.74,0.74,0.74,0.74,1,repeat first instrument,redcaptidier +1.23,1.23,1.23,1.23,1.23,1.23,1,repeat event,redcaptidier +1.04,1.04,1.04,1.04,1.04,1.04,1,restricted access,redcaptidier +1.06,1.06,1.06,1.06,1.06,1.06,1,large sparse db,redcaptidier +0.74,0.74,0.74,0.74,0.74,0.74,1,data access groups,redcaptidier +1.35,1.35,1.35,1.35,1.35,1.35,1,longitudinal data access groups,redcaptidier +1.42,1.42,1.42,1.42,1.42,1.42,1,mixed structure repeat no repeat,redcaptidier +5.33,5.33,5.33,5.33,5.33,5.33,1,prodigy db,redcaptidier +8.95,8.95,8.95,8.95,8.95,8.95,1,cart comprehensive db,redcaptidier diff --git a/vignettes/REDCapTidieR.Rmd b/vignettes/REDCapTidieR.Rmd index 884c4b0b..d70be3bd 100644 --- a/vignettes/REDCapTidieR.Rmd +++ b/vignettes/REDCapTidieR.Rmd @@ -246,3 +246,21 @@ heroes_information_metadata |> ``` This enables quick insights into data content and supports exploratory data analytics. The columns added by `add_skimr_metadata()` can also be [labelled](glossary.html#labelled). + +## Package Options + +REDCapTidieR allows you to set a couple options globally to avoid passing extra arguments to `read_redcap`. + +Globally allow [mixed structure instruments](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#mixed-structure-instruments): + +``` +options(redcaptidier.allow.mixed.structure = TRUE) +``` + +Globally silence warnings related to [Missing Data Codes](https://kb.wisc.edu/smph/informatics/page.php?id=108107) (MDCs): + +``` +options(redcaptidier.allow.mdc = TRUE) +``` + +As of v1.1.0, REDCapTidieR has partial support for MDCs. MDCs in logical and categorical fields are converted to `NA` with a warning. MDCs in all other field types remain in the output. If you need greater support for MDCs, consider [opening an issue](https://github.com/CHOP-CGTInformatics/REDCapTidieR/issues)! diff --git a/vignettes/articles/diving_deeper.Rmd b/vignettes/articles/diving_deeper.Rmd index 8d4d5e48..078e976a 100644 --- a/vignettes/articles/diving_deeper.Rmd +++ b/vignettes/articles/diving_deeper.Rmd @@ -173,7 +173,7 @@ Note that REDCapTidieR allows for an instrument to be associated with both repea REDCapTidieR supports projects with multiple arms. If you have a project with multiple arms, there will be an additional column `redcap_arm` to identify the arm that the row is associated with. -### Mixed Structure Instruments +### Mixed Structure Instruments {#mixed-structure-instruments} By default, REDCapTidieR does not allow you to have the same instrument designated both as a repeating and as a nonrepeating instrument in different events (i.e. a "mixed structure instrument"), and will throw an error if this is detected: