diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index c9b86ec0..062e53d4 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -29,6 +29,7 @@ jobs:
REDCAPTIDIER_LARGE_SPARSE_API: ${{ secrets.REDCAPTIDIER_LARGE_SPARSE_API }}
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }}
+ REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }}
steps:
- name: Update Ubuntu, Install cURL Headers, add Libraries
run: |
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index 1dd23590..0b6da316 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -25,6 +25,7 @@ jobs:
SUPERHEROES_REDCAP_API: ${{ secrets.SUPERHEROES_REDCAP_API }}
REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API: ${{ secrets.REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API }}
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
+ REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }}
steps:
- uses: actions/checkout@v3
diff --git a/DESCRIPTION b/DESCRIPTION
index 3fd75276..a55b0122 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: REDCapTidieR
Type: Package
Title: Extract 'REDCap' Databases into Tidy 'Tibble's
-Version: 1.0.0
+Version: 1.1.0
Authors@R: c(
person("Richard", "Hanna", , "richardshanna91@gmail.com", role = c("aut", "cre"),
comment = c(ORCID = "0009-0005-6496-8154")),
diff --git a/NAMESPACE b/NAMESPACE
index b3f04e42..137cf7a0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -92,6 +92,7 @@ importFrom(rlang,try_fetch)
importFrom(rlang,zap)
importFrom(stringi,stri_split_fixed)
importFrom(stringr,str_detect)
+importFrom(stringr,str_ends)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_squish)
diff --git a/NEWS.md b/NEWS.md
index 41de6306..8753513c 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,8 @@
+# REDCapTidieR 1.1.0
+
+- `read_redcap()` now supports instruments that follow a mixed repeating/non-repeating structure with the `allow_mixed_structure` parameter
+- When enabled, instruments with mixed repeating/nonrepeating structure will be treated as single-instance repeating instruments
+
# REDCapTidieR 1.0.0
Version 1.0.0
diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R
index 1f932d5f..c277dc2b 100644
--- a/R/REDCapTidieR-package.R
+++ b/R/REDCapTidieR-package.R
@@ -18,7 +18,7 @@
#' is_installed new_environment quo_get_expr try_fetch zap as_label
#' @importFrom stringi stri_split_fixed
#' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc
-#' str_trim
+#' str_trim str_ends
#' @importFrom tibble as_tibble is_tibble tibble
#' @importFrom tidyr complete fill pivot_wider nest unnest unnest_wider
#' @importFrom tidyselect all_of any_of ends_with eval_select everything
diff --git a/R/checks.R b/R/checks.R
index b2a8a28b..06eb6e45 100644
--- a/R/checks.R
+++ b/R/checks.R
@@ -101,48 +101,25 @@ check_user_rights <- function(db_data,
check_repeat_and_nonrepeat <- function(db_data, call = caller_env()) {
- # Identify columns to check for repeat/nonrepeat behavior
- safe_cols <- c(
- names(db_data)[1], "redcap_event_name",
- "redcap_repeat_instrument", "redcap_repeat_instance",
- "redcap_data_access_group"
- )
-
- check_cols <- setdiff(names(db_data), safe_cols)
-
- # Set up check_data function that looks for repeating and nonrepeating
- # behavior in a given column and returns a boolean
- check_data <- function(db_data, check_col) {
- # Repeating Check
- rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"]))
-
- # Nonrepeating Check
- nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"]))
-
- rep & nonrep
- }
-
- # Create a simple dataframe, loop through check columns and append
- # dataframe with column being checked and the output of check_data
- out <- data.frame()
- for (i in seq_along(check_cols)) {
- rep_and_nonrep <- db_data %>%
- check_data(check_col = check_cols[i])
-
- field <- check_cols[i]
-
- out <- rbind(out, data.frame(field, rep_and_nonrep))
- out
- }
+ out <- get_mixed_structure_fields(db_data = db_data)
# Filter for violations
out <- out %>%
- filter(rep_and_nonrep)
+ filter(.data$rep_and_nonrep)
# Produce error message if violations detected
if (nrow(out) > 0) {
- cli_abort(c("x" = "Instrument{?s} detected that ha{?s/ve} both repeating and
- nonrepeating instances defined in the project: {out$field}"),
+ cli_abort(
+ c(
+ "x" = "Instrument{?s} detected that ha{?s/ve} both repeating and
+ nonrepeating instances defined in the project: {out$field}",
+ "i" = paste0(
+ "Set {.code allow_mixed_structure} to {.code TRUE} to override. ",
+ "See ",
+ "{.href [Mixed Structure Instruments](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#mixed-structure-instruments)} ", # nolint line_length_linter
+ "for more information."
+ )
+ ),
class = c("repeat_nonrepeat_instrument", "REDCapTidieR_cond"),
call = call
)
diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R
index 122a4a4d..56402796 100644
--- a/R/clean_redcap_long.R
+++ b/R/clean_redcap_long.R
@@ -12,6 +12,10 @@
#' \code{REDCapR::redcap_metadata_read()$data}
#' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap
#' events/arms
+#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating
+#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions
+#' as repeating instruments with a single instance. Applies to longitudinal projects
+#' only. Default `FALSE`.
#'
#' @return
#' Returns a \code{tibble} with list elements containing tidy dataframes. Users
@@ -22,7 +26,8 @@
clean_redcap_long <- function(db_data_long,
db_metadata_long,
- linked_arms) {
+ linked_arms,
+ allow_mixed_structure = FALSE) {
# Repeating Instrument Check ----
# Check if database supplied contains any repeating instruments to map onto
# `redcap_repeat_*` variables
@@ -33,30 +38,13 @@ clean_redcap_long <- function(db_data_long,
assert_data_frame(db_data_long)
assert_data_frame(db_metadata_long)
- if (has_repeat_forms) {
- check_repeat_and_nonrepeat(db_data_long)
- }
-
- ## Repeating Instruments Logic ----
+ ## Repeating Forms Assignment ----
+ # Needed first to inform nonrepeating forms logic
if (has_repeat_forms) {
repeated_forms <- db_data_long %>%
filter(!is.na(.data$redcap_repeat_instrument)) %>%
pull(.data$redcap_repeat_instrument) %>%
unique()
-
- repeated_forms_tibble <- tibble(
- redcap_form_name = repeated_forms,
- redcap_data = map(
- .data$redcap_form_name,
- ~ distill_repeat_table_long(
- .x,
- db_data_long,
- db_metadata_long,
- linked_arms
- )
- ),
- structure = "repeating"
- )
}
## Nonrepeating Instruments Logic ----
@@ -86,6 +74,47 @@ clean_redcap_long <- function(db_data_long,
structure = "nonrepeating"
)
+ ## Repeating Instruments Logic ----
+ if (has_repeat_forms) {
+ # If mixed structure allowed, retrieve mixed structure forms
+ has_mixed_structure_forms <- FALSE # nolint: object_usage_linter
+
+ mixed_structure_ref <- data.frame()
+
+ if (allow_mixed_structure) {
+ # Retrieve mixed structure fields and forms in reference df
+ mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>%
+ filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>%
+ left_join(db_metadata_long %>% select(.data$field_name, .data$form_name),
+ by = "field_name"
+ )
+
+ # Update if project actually has mixed structure
+ has_mixed_structure_forms <- nrow(mixed_structure_ref) > 0
+ } else {
+ check_repeat_and_nonrepeat(db_data_long)
+ }
+
+ repeated_forms_tibble <- tibble(
+ redcap_form_name = repeated_forms,
+ redcap_data = map(
+ .data$redcap_form_name,
+ ~ distill_repeat_table_long(
+ .x,
+ db_data_long,
+ db_metadata_long,
+ linked_arms,
+ has_mixed_structure_forms = has_mixed_structure_forms,
+ mixed_structure_ref = mixed_structure_ref
+ )
+ ),
+ structure = case_when(
+ has_mixed_structure_forms & redcap_form_name %in% mixed_structure_ref$form_name ~ "mixed",
+ TRUE ~ "repeating"
+ )
+ )
+ }
+
if (has_repeat_forms) {
rbind(repeated_forms_tibble, nonrepeated_forms_tibble)
} else {
@@ -235,13 +264,19 @@ distill_nonrepeat_table_long <- function(form_name,
#' \code{REDCapR::redcap_metadata_read()$data}
#' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap
#' events/arms
+#' @param has_mixed_structure Whether the instrument under evaluation has a mixed
+#' structure. Default `FALSE`.
+#' @param name mixed_structure_ref A mixed structure reference dataframe supplied
+#' by `get_mixed_structure_fields()`.
#'
#' @keywords internal
distill_repeat_table_long <- function(form_name,
db_data_long,
db_metadata_long,
- linked_arms) {
+ linked_arms,
+ has_mixed_structure_forms = FALSE,
+ mixed_structure_ref = NULL) {
has_repeat_forms <- "redcap_repeat_instance" %in% names(db_data_long)
my_record_id <- names(db_data_long)[1]
@@ -275,6 +310,11 @@ distill_repeat_table_long <- function(form_name,
my_fields <- c(my_fields, "redcap_data_access_group")
}
+ # If has mixed structure, convert form
+ if (has_mixed_structure_forms) {
+ db_data_long <- convert_mixed_instrument(db_data_long, mixed_structure_ref %>% filter(form_name == my_form))
+ }
+
# Setup data for loop redcap_arm linking
db_data_long <- db_data_long %>%
add_partial_keys(var = .data$redcap_event_name) %>%
@@ -337,3 +377,89 @@ distill_repeat_table_long <- function(form_name,
out %>%
tibble()
}
+
+#' @title Convert Mixed Structure Instruments to Repeating Instruments
+#'
+#' @description
+#' For longitudinal projects where users set `allow_mixed_structure` to `TRUE`,
+#' this function will handle the process of setting the nonrepeating parts of the
+#' instrument to repeating ones with a single instance.
+#'
+#' @param db_data_long The longitudinal REDCap database output defined by
+#' \code{REDCapR::redcap_read_oneshot()$data}
+#' @param mixed_structure_ref Reference dataframe containing mixed structure
+#' fields and forms.
+#'
+#' @return
+#' Returns a \code{tibble} with list elements containing tidy dataframes. Users
+#' can access dataframes under the \code{redcap_data} column with reference to
+#' \code{form_name} and \code{structure} column details.
+#'
+#' @keywords internal
+
+convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
+ for (i in seq_len(nrow(mixed_structure_ref))) {
+ field <- mixed_structure_ref$field_name[i]
+ form <- mixed_structure_ref$form_name[i]
+
+ # Create a logical mask for rows needing update
+ update_mask <- is.na(db_data_long$redcap_repeat_instance) & !is.na(db_data_long[[field]])
+
+ # Update redcap_repeat_instance
+ db_data_long$redcap_repeat_instance <- if_else(update_mask, 1, db_data_long$redcap_repeat_instance)
+
+ # Update redcap_repeat_instrument
+ db_data_long$redcap_repeat_instrument <- if_else(update_mask, form, db_data_long$redcap_repeat_instrument)
+ }
+
+ db_data_long
+}
+
+#' @title Get Mixed Structure Instrument List
+#'
+#' @description
+#' Define fields in a given project that are used in both a repeating and
+#' nonrepeating manner.
+#'
+#' @param db_data The REDCap database output generated by
+#' \code{REDCapR::redcap_read_oneshot()$data}
+#'
+#' @returns a dataframe
+#'
+#' @keywords internal
+
+get_mixed_structure_fields <- function(db_data) {
+ # Identify columns to check for repeat/nonrepeat behavior
+ safe_cols <- c(
+ names(db_data)[1], "redcap_event_name",
+ "redcap_repeat_instrument", "redcap_repeat_instance",
+ "redcap_data_access_group"
+ )
+
+ check_cols <- setdiff(names(db_data), safe_cols)
+
+ # Set up check_data function that looks for repeating and nonrepeating
+ # behavior in a given column and returns a boolean
+ check_data <- function(db_data, check_col) {
+ # Repeating Check
+ rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"]))
+
+ # Nonrepeating Check
+ nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"]))
+
+ rep & nonrep
+ }
+
+ # Create a simple dataframe, loop through check columns and append
+ # dataframe with column being checked and the output of check_data
+ out <- data.frame()
+ for (i in seq_along(check_cols)) {
+ rep_and_nonrep <- db_data %>%
+ check_data(check_col = check_cols[i])
+
+ field_name <- check_cols[i]
+
+ out <- rbind(out, data.frame(field_name, rep_and_nonrep))
+ }
+ out
+}
diff --git a/R/read_redcap.R b/R/read_redcap.R
index 74e5cf25..e60b26d4 100644
--- a/R/read_redcap.R
+++ b/R/read_redcap.R
@@ -53,6 +53,10 @@
#' @param guess_max A positive [base::numeric] value
#' passed to [readr::read_csv()] that specifies the maximum number of records to
#' use for guessing column types. Default `.Machine$integer.max`.
+#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating
+#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions
+#' as repeating instruments with a single instance. Applies to longitudinal projects
+#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = FALSE)`.
#'
#' @examples
#' \dontrun{
@@ -75,7 +79,8 @@ read_redcap <- function(redcap_uri,
export_survey_fields = NULL,
export_data_access_groups = NULL,
suppress_redcapr_messages = TRUE,
- guess_max = .Machine$integer.max) {
+ guess_max = .Machine$integer.max,
+ allow_mixed_structure = getOption("redcaptidier.allow.mixed.structure", FALSE)) {
check_arg_is_character(redcap_uri, len = 1, any.missing = FALSE)
check_arg_is_character(token, len = 1, any.missing = FALSE)
check_arg_is_valid_token(token)
@@ -84,6 +89,7 @@ read_redcap <- function(redcap_uri,
check_arg_is_logical(export_survey_fields, len = 1, any.missing = FALSE, null.ok = TRUE)
check_arg_is_logical(export_data_access_groups, len = 1, any.missing = FALSE, null.ok = TRUE)
check_arg_is_logical(suppress_redcapr_messages, len = 1, any.missing = FALSE)
+ check_arg_is_logical(allow_mixed_structure, len = 1, any.missing = FALSE)
# Load REDCap Metadata ----
# Capture unexpected metadata API call errors
@@ -267,7 +273,8 @@ read_redcap <- function(redcap_uri,
out <- clean_redcap_long(
db_data_long = db_data,
db_metadata_long = db_metadata,
- linked_arms = linked_arms
+ linked_arms = linked_arms,
+ allow_mixed_structure = allow_mixed_structure
)
} else {
out <- clean_redcap(
diff --git a/inst/testdata/db_metadata_mixed_structure.RDS b/inst/testdata/db_metadata_mixed_structure.RDS
new file mode 100644
index 00000000..490ebbdc
Binary files /dev/null and b/inst/testdata/db_metadata_mixed_structure.RDS differ
diff --git a/inst/testdata/db_mixed_structure.RDS b/inst/testdata/db_mixed_structure.RDS
new file mode 100644
index 00000000..91b86a2a
Binary files /dev/null and b/inst/testdata/db_mixed_structure.RDS differ
diff --git a/inst/testdata/db_mixed_structure_linked_arms.RDS b/inst/testdata/db_mixed_structure_linked_arms.RDS
new file mode 100644
index 00000000..5892f5b2
Binary files /dev/null and b/inst/testdata/db_mixed_structure_linked_arms.RDS differ
diff --git a/man/clean_redcap_long.Rd b/man/clean_redcap_long.Rd
index 9926f46f..46e5fcf0 100644
--- a/man/clean_redcap_long.Rd
+++ b/man/clean_redcap_long.Rd
@@ -4,7 +4,12 @@
\alias{clean_redcap_long}
\title{Extract longitudinal REDCap databases into tidy tibbles}
\usage{
-clean_redcap_long(db_data_long, db_metadata_long, linked_arms)
+clean_redcap_long(
+ db_data_long,
+ db_metadata_long,
+ linked_arms,
+ allow_mixed_structure = FALSE
+)
}
\arguments{
\item{db_data_long}{The longitudinal REDCap database output defined by
@@ -15,6 +20,11 @@ clean_redcap_long(db_data_long, db_metadata_long, linked_arms)
\item{linked_arms}{Output of \code{link_arms}, linking instruments to REDCap
events/arms}
+
+\item{allow_mixed_structure}{A logical to allow for support of mixed repeating/non-repeating
+instruments. Setting to \code{TRUE} will treat the mixed instrument's non-repeating versions
+as repeating instruments with a single instance. Applies to longitudinal projects
+only. Default \code{FALSE}.}
}
\value{
Returns a \code{tibble} with list elements containing tidy dataframes. Users
diff --git a/man/convert_mixed_instrument.Rd b/man/convert_mixed_instrument.Rd
new file mode 100644
index 00000000..ed3a2816
--- /dev/null
+++ b/man/convert_mixed_instrument.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clean_redcap_long.R
+\name{convert_mixed_instrument}
+\alias{convert_mixed_instrument}
+\title{Convert Mixed Structure Instruments to Repeating Instruments}
+\usage{
+convert_mixed_instrument(db_data_long, mixed_structure_ref)
+}
+\arguments{
+\item{db_data_long}{The longitudinal REDCap database output defined by
+\code{REDCapR::redcap_read_oneshot()$data}}
+
+\item{mixed_structure_ref}{Reference dataframe containing mixed structure
+fields and forms.}
+}
+\value{
+Returns a \code{tibble} with list elements containing tidy dataframes. Users
+can access dataframes under the \code{redcap_data} column with reference to
+\code{form_name} and \code{structure} column details.
+}
+\description{
+For longitudinal projects where users set \code{allow_mixed_structure} to \code{TRUE},
+this function will handle the process of setting the nonrepeating parts of the
+instrument to repeating ones with a single instance.
+}
+\keyword{internal}
diff --git a/man/distill_repeat_table_long.Rd b/man/distill_repeat_table_long.Rd
index 814abbcb..6cecc0fc 100644
--- a/man/distill_repeat_table_long.Rd
+++ b/man/distill_repeat_table_long.Rd
@@ -8,7 +8,9 @@ distill_repeat_table_long(
form_name,
db_data_long,
db_metadata_long,
- linked_arms
+ linked_arms,
+ has_mixed_structure_forms = FALSE,
+ mixed_structure_ref = NULL
)
}
\arguments{
@@ -23,6 +25,12 @@ REDCap metadata.}
\item{linked_arms}{Output of \code{link_arms}, linking instruments to REDCap
events/arms}
+
+\item{has_mixed_structure}{Whether the instrument under evaluation has a mixed
+structure. Default \code{FALSE}.}
+
+\item{name}{mixed_structure_ref A mixed structure reference dataframe supplied
+by \code{get_mixed_structure_fields()}.}
}
\value{
A \code{tibble} of all data related to a specified \code{form_name}
diff --git a/man/get_mixed_structure_fields.Rd b/man/get_mixed_structure_fields.Rd
new file mode 100644
index 00000000..13340521
--- /dev/null
+++ b/man/get_mixed_structure_fields.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/clean_redcap_long.R
+\name{get_mixed_structure_fields}
+\alias{get_mixed_structure_fields}
+\title{Get Mixed Structure Instrument List}
+\usage{
+get_mixed_structure_fields(db_data)
+}
+\arguments{
+\item{db_data}{The REDCap database output generated by
+\code{REDCapR::redcap_read_oneshot()$data}}
+}
+\value{
+a dataframe
+}
+\description{
+Define fields in a given project that are used in both a repeating and
+nonrepeating manner.
+}
+\keyword{internal}
diff --git a/man/read_redcap.Rd b/man/read_redcap.Rd
index a57f4880..7cd5d2bb 100644
--- a/man/read_redcap.Rd
+++ b/man/read_redcap.Rd
@@ -12,7 +12,8 @@ read_redcap(
export_survey_fields = NULL,
export_data_access_groups = NULL,
suppress_redcapr_messages = TRUE,
- guess_max = .Machine$integer.max
+ guess_max = .Machine$integer.max,
+ allow_mixed_structure = getOption("redcaptidier.allow.mixed.structure", FALSE)
)
}
\arguments{
@@ -45,6 +46,11 @@ from REDCapR API calls. Default \code{TRUE}.}
\item{guess_max}{A positive \link[base:numeric]{base::numeric} value
passed to \code{\link[readr:read_delim]{readr::read_csv()}} that specifies the maximum number of records to
use for guessing column types. Default \code{.Machine$integer.max}.}
+
+\item{allow_mixed_structure}{A logical to allow for support of mixed repeating/non-repeating
+instruments. Setting to \code{TRUE} will treat the mixed instrument's non-repeating versions
+as repeating instruments with a single instance. Applies to longitudinal projects
+only. Default \code{FALSE}. Can be set globally with \code{options(redcaptidier.allow.mixed.structure = FALSE)}.}
}
\value{
A \code{tibble} in which each row represents a REDCap instrument. It
diff --git a/tests/testthat/test-clean_redcap_long.R b/tests/testthat/test-clean_redcap_long.R
index 97d87b14..04f479f0 100644
--- a/tests/testthat/test-clean_redcap_long.R
+++ b/tests/testthat/test-clean_redcap_long.R
@@ -28,6 +28,15 @@ db_metadata_long_noarms <- readRDS(
linked_arms_long_noarms <- readRDS(
system.file("testdata/linked_arms_long_noarms.RDS", package = "REDCapTidieR")
)
+db_mixed_structure <- readRDS(
+ system.file("testdata/db_mixed_structure.RDS", package = "REDCapTidieR")
+)
+db_metadata_mixed_structure <- readRDS(
+ system.file("testdata/db_metadata_mixed_structure.RDS", package = "REDCapTidieR")
+)
+db_mixed_structure_linked_arms <- readRDS(
+ system.file("testdata/db_mixed_structure_linked_arms.RDS", package = "REDCapTidieR")
+)
# Run Tests ----
test_that("clean_redcap_long with arms works", {
@@ -72,6 +81,59 @@ test_that("clean_redcap_long without arms works", {
expect_true(!is.null(out$redcap_data))
})
+test_that("clean_redcap_long with mixed structure works", {
+ # Required since amendments take place before clean_redcap_long call in read_redcap
+ db_metadata_mixed_structure <- update_field_names(db_metadata_mixed_structure)
+
+ # Expect error when allow_mixed_structure not specified
+ expect_error(
+ clean_redcap_long(
+ db_data_long = db_mixed_structure,
+ db_metadata_long = db_metadata_mixed_structure,
+ linked_arms = db_mixed_structure_linked_arms
+ ),
+ class = "repeat_nonrepeat_instrument"
+ )
+
+ out <- clean_redcap_long(
+ db_data_long = db_mixed_structure,
+ db_metadata_long = db_metadata_mixed_structure,
+ linked_arms = db_mixed_structure_linked_arms,
+ allow_mixed_structure = TRUE
+ )
+
+ # Check general structure, check all three structure types present
+ expect_true(is_tibble(out))
+ expect_true("mixed" %in% out$structure)
+ expect_true("nonrepeating" %in% out$structure)
+ expect_true("repeating" %in% out$structure)
+ expect_true(!is.null(out$redcap_data))
+
+ # Check redcap_data contents for mixed and nonrepeating structure
+ expected_mixed_data <- tibble::tribble(
+ ~record_id, ~redcap_event, ~redcap_form_instance, ~mixed_structure_1, ~form_status_complete,
+ 1, "event_1", 1, "Mixed Nonrepeat 1", 0,
+ 1, "event_2", 1, "Mixed Repeat 1", 0,
+ 1, "event_2", 2, "Mixed Repeat 2", 0
+ )
+
+ expected_nonrepeat_data <- tibble::tribble(
+ ~record_id, ~redcap_event, ~nonrepeat_1, ~form_status_complete,
+ 1, "event_1", "Nonrepeat 1", 0,
+ 1, "event_2", "Nonrepeat 2", 0
+ )
+
+ expect_equal(
+ out$redcap_data[out$redcap_form_name == "mixed_structure_form"][[1]],
+ expected_mixed_data
+ )
+
+ expect_equal(
+ out$redcap_data[out$redcap_form_name == "nonrepeat_form"][[1]],
+ expected_nonrepeat_data
+ )
+})
+
test_that("distill_nonrepeat_table_long tibble contains expected columns for longitudinal REDCap databases with arms", {
## Check longitudinal structure with arms ----
out <- distill_nonrepeat_table_long(
@@ -208,3 +270,47 @@ test_that("distill_repeat_table_long no arms returns tables for REDCap dbs with
any(c("redcap_repeat_instrument", "redcap_arm") %in% names(out))
)
})
+
+test_that("get_mixed_structure_fields works", {
+ mixed_structure_db <- tibble::tribble(
+ ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable,
+ 1, NA, NA, "A",
+ 2, "mixed_structure_form", 1, "B"
+ )
+
+ expected_out <- data.frame(
+ field_name = "mixed_structure_variable",
+ rep_and_nonrep = TRUE
+ )
+
+ out <- get_mixed_structure_fields(mixed_structure_db)
+
+ expect_equal(out, expected_out)
+})
+
+test_that("convert_mixed_instrument works", {
+ mixed_structure_db <- tibble::tribble(
+ ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable,
+ 1, NA, NA, "A", NA,
+ 2, "mixed_structure_form", 1, "B", NA,
+ 3, "repeat_form", 1, NA, "C",
+ 4, "repeat_form", 2, NA, "D"
+ )
+
+ mixed_structure_ref <- tibble::tribble(
+ ~field_name, ~rep_and_nonrep, ~form_name,
+ "mixed_structure_variable", TRUE, "mixed_structure_form"
+ )
+
+ expected_out <- tibble::tribble(
+ ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable,
+ 1, "mixed_structure_form", 1, "A", NA,
+ 2, "mixed_structure_form", 1, "B", NA,
+ 3, "repeat_form", 1, NA, "C",
+ 4, "repeat_form", 2, NA, "D"
+ )
+
+ out <- convert_mixed_instrument(mixed_structure_db, mixed_structure_ref)
+
+ expect_equal(out, expected_out)
+})
diff --git a/utility/microbenchmark_results.csv b/utility/microbenchmark_results.csv
index ef042006..3a08cbcd 100644
--- a/utility/microbenchmark_results.csv
+++ b/utility/microbenchmark_results.csv
@@ -1,41 +1,43 @@
-min,lq,mean,median,uq,max,neval
-891.35,891.35,891.35,891.35,891.35,891.35,1
-1.42,1.42,1.42,1.42,1.42,1.42,1
-661.09,661.09,661.09,661.09,661.09,661.09,1
-2.23,2.23,2.23,2.23,2.23,2.23,1
-3.37,3.37,3.37,3.37,3.37,3.37,1
-699.25,699.25,699.25,699.25,699.25,699.25,1
-641.48,641.48,641.48,641.48,641.48,641.48,1
-596.4,596.4,596.4,596.4,596.4,596.4,1
-600.89,600.89,600.89,600.89,600.89,600.89,1
-747.66,747.66,747.66,747.66,747.66,747.66,1
-710.54,710.54,710.54,710.54,710.54,710.54,1
-632.86,632.86,632.86,632.86,632.86,632.86,1
-623.4,623.4,623.4,623.4,623.4,623.4,1
-588.44,588.44,588.44,588.44,588.44,588.44,1
-150.11,150.11,150.11,150.11,150.11,150.11,1
-654.46,654.46,654.46,654.46,654.46,654.46,1
-581.29,581.29,581.29,581.29,581.29,581.29,1
-1.06,1.06,1.06,1.06,1.06,1.06,1
-1.17,1.17,1.17,1.17,1.17,1.17,1
-601.02,601.02,601.02,601.02,601.02,601.02,1
-658.33,658.33,658.33,658.33,658.33,658.33,1
-635.63,635.63,635.63,635.63,635.63,635.63,1
-779.47,779.47,779.47,779.47,779.47,779.47,1
-610.22,610.22,610.22,610.22,610.22,610.22,1
-668.01,668.01,668.01,668.01,668.01,668.01,1
-720.67,720.67,720.67,720.67,720.67,720.67,1
-1.08,1.08,1.08,1.08,1.08,1.08,1
-1.07,1.07,1.07,1.07,1.07,1.07,1
-1.96,1.96,1.96,1.96,1.96,1.96,1
-1.48,1.48,1.48,1.48,1.48,1.48,1
-1.64,1.64,1.64,1.64,1.64,1.64,1
-2.16,2.16,2.16,2.16,2.16,2.16,1
-878.95,878.95,878.95,878.95,878.95,878.95,1
-1.73,1.73,1.73,1.73,1.73,1.73,1
-961.31,961.31,961.31,961.31,961.31,961.31,1
-1.14,1.14,1.14,1.14,1.14,1.14,1
-975.07,975.07,975.07,975.07,975.07,975.07,1
-1.55,1.55,1.55,1.55,1.55,1.55,1
-5.59,5.59,5.59,5.59,5.59,5.59,1
-8.51,8.51,8.51,8.51,8.51,8.51,1
+min,lq,mean,median,uq,max,neval,description,source
+1.56,1.56,1.56,1.56,1.56,1.56,1,simple static (read-only) test project,ouhsc
+2.08,2.08,2.08,2.08,2.08,2.08,1,longitudinal (read-only) ARM test project,ouhsc
+841.23,841.23,841.23,841.23,841.23,841.23,1,simple write data,ouhsc
+4.34,4.34,4.34,4.34,4.34,4.34,1,Russian Characters,ouhsc
+6.67,6.67,6.67,6.67,6.67,6.67,1,"super-wide --3,000 columns",ouhsc
+912.5,912.5,912.5,912.5,912.5,912.5,1,static (not longitudinal) survey test project,ouhsc
+769.03,769.03,769.03,769.03,769.03,769.03,1,"Clinical Trial (Fake) --Read-only, contributed by @higgi13425",ouhsc
+1.08,1.08,1.08,1.08,1.08,1.08,1,nonnumeric record_id,ouhsc
+729.67,729.67,729.67,729.67,729.67,729.67,1,DAG Read,ouhsc
+936.55,936.55,936.55,936.55,936.55,936.55,1,potentially problematic values,ouhsc
+778.25,778.25,778.25,778.25,778.25,778.25,1,Repeating Instruments,ouhsc
+1.62,1.62,1.62,1.62,1.62,1.62,1,simple write metadata,ouhsc
+974.01,974.01,974.01,974.01,974.01,974.01,1,DAG Write -admin,ouhsc
+791.47,791.47,791.47,791.47,791.47,791.47,1,DAG Write -group A,ouhsc
+389.54,389.54,389.54,389.54,389.54,389.54,1,"super-wide #3--35,000 columns",ouhsc
+827.92,827.92,827.92,827.92,827.92,827.92,1,Repeating Instruments --Sparse,ouhsc
+636.88,636.88,636.88,636.88,636.88,636.88,1,Delete Single Arm,ouhsc
+1.41,1.41,1.41,1.41,1.41,1.41,1,Delete Multiple Arm,ouhsc
+1.14,1.14,1.14,1.14,1.14,1.14,1,longitudinal single arm,ouhsc
+1.01,1.01,1.01,1.01,1.01,1.01,1,decimal comma and dot,ouhsc
+997.98,997.98,997.98,997.98,997.98,997.98,1,decimal comma,ouhsc
+900.25,900.25,900.25,900.25,900.25,900.25,1,decimal dot,ouhsc
+1.04,1.04,1.04,1.04,1.04,1.04,1,Validation Types,ouhsc
+1.03,1.03,1.03,1.03,1.03,1.03,1,Blank for Gray Status,ouhsc
+903.09,903.09,903.09,903.09,903.09,903.09,1,Checkboxes 1,ouhsc
+1.32,1.32,1.32,1.32,1.32,1.32,1,Vignette: Longitudinal & Repeating Measures,ouhsc
+2.49,2.49,2.49,2.49,2.49,2.49,1,classic,redcaptidier
+3.02,3.02,3.02,3.02,3.02,3.02,1,classic no repeat,redcaptidier
+3.36,3.36,3.36,3.36,3.36,3.36,1,longitudinal,redcaptidier
+3.67,3.67,3.67,3.67,3.67,3.67,1,longitudinal no arms,redcaptidier
+5.06,5.06,5.06,5.06,5.06,5.06,1,longitudinal no repeat,redcaptidier
+5.01,5.01,5.01,5.01,5.01,5.01,1,deep dive vignette,redcaptidier
+2.11,2.11,2.11,2.11,2.11,2.11,1,repeat first instrument,redcaptidier
+3.35,3.35,3.35,3.35,3.35,3.35,1,repeat event,redcaptidier
+2.46,2.46,2.46,2.46,2.46,2.46,1,restricted access,redcaptidier
+2,2,2,2,2,2,1,large sparse db,redcaptidier
+1.94,1.94,1.94,1.94,1.94,1.94,1,data access groups,redcaptidier
+3.28,3.28,3.28,3.28,3.28,3.28,1,longitudinal data access groups,redcaptidier
+3.15,3.15,3.15,3.15,3.15,3.15,1,mixed structure repeat no repeat,redcaptidier
+10.44,10.44,10.44,10.44,10.44,10.44,1,prodigy db,redcaptidier
+15.1,15.1,15.1,15.1,15.1,15.1,1,cart comprehensive db,redcaptidier
+48.32,48.32,48.32,48.32,48.32,48.32,1,bmt outcomes db,redcaptidier
diff --git a/utility/test_creds.R b/utility/test_creds.R
index d42883aa..97c51353 100644
--- a/utility/test_creds.R
+++ b/utility/test_creds.R
@@ -31,17 +31,18 @@ redcaptidier_creds <- tibble::tribble(
Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_LARGE_SPARSE_API"), "large sparse db",
Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_DAG_API"), "data access groups",
Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_LONGITUDINAL_DAG_API"), "longitudinal data access groups",
+ Sys.getenv("REDCAP_URI"), Sys.getenv("REDCAPTIDIER_MIXED_STRUCTURE_API"), "mixed structure repeat no repeat",
Sys.getenv("REDCAP_URI"), Sys.getenv("PRODIGY_REDCAP_API"), "prodigy db",
Sys.getenv("REDCAP_URI"), Sys.getenv("CART_COMP_REDCAP_API"), "cart comprehensive db",
Sys.getenv("REDCAP_URI"), Sys.getenv("BMT_OUTCOMES_REDCAP_API"), "bmt outcomes db"
)
# Combine Credentials
-creds <- rbind(ouhsc_creds, redcaptidier_creds)
+creds <- rbind(ouhsc_creds %>% mutate(source = "ouhsc"), redcaptidier_creds %>% mutate(source = "redcaptidier"))
microbenchmark_fx <- function(redcap_uri, token, name, times = 1){
microbenchmark(
- name = read_redcap(redcap_uri = redcap_uri, token = token),
+ name = read_redcap(redcap_uri = redcap_uri, token = token, allow_mixed_structure = TRUE),
times = times
)
}
@@ -57,4 +58,5 @@ for (i in seq_along(microbenchmark_results)) {
out %>%
select(-expr) %>%
mutate(across(tidyselect::everything(), ~round(., digits = 2))) %>%
+ cbind(., "description" = creds$comment, "source" = creds$source) %>%
readr::write_csv("utility/microbenchmark_results.csv")
diff --git a/vignettes/articles/diving_deeper.Rmd b/vignettes/articles/diving_deeper.Rmd
index 127c690f..8d4d5e48 100644
--- a/vignettes/articles/diving_deeper.Rmd
+++ b/vignettes/articles/diving_deeper.Rmd
@@ -19,6 +19,7 @@ knitr::knit_exit()
redcap_uri <- Sys.getenv("REDCAP_URI")
superheroes_token <- Sys.getenv("SUPERHEROES_REDCAP_API")
longitudinal_token <- Sys.getenv("REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API")
+mixed_token <- Sys.getenv("REDCAPTIDIER_MIXED_STRUCTURE_API")
survey_token <- Sys.getenv("REDCAPTIDIER_CLASSIC_API")
dag_token <- Sys.getenv("REDCAPTIDIER_DAG_API")
```
@@ -80,7 +81,7 @@ super_hero_powers |>
rmarkdown::paged_table()
```
-## Longitudinal REDCap projects
+## Longitudinal REDCap Projects
REDCap supports two main mechanisms to allow collecting the same data multiple times: [**repeating instruments**](glossary.html#repeating-instrument) and [**longitudinal projects**](glossary.html#longitudinal-project). In addition, a longitudinal project may have [**arms**](glossary.html#arm) and/or [**repeating events**](glossary.html#repeating-event).
@@ -156,8 +157,6 @@ adverse_events |>
It is possible to have a repeating instrument designated to multiple events, however this is an uncommon pattern. REDCapTidieR supports this scenario as well.
-REDCapTidieR does *not* allow you to have the same instrument designated both as a repeating and as a nonrepeating instrument in different events. It will throw an error if it detects this.
-
The `unscheduled` **event** is a **repeating event**. Like adverse events, unscheduled visits aren't tied to a pre-determined study visit, and a patient could have zero, one, or multiple unscheduled visits. On the other hand, you might want to record the same kinds of data for an unscheduled visit as for a pre-determined regular visit and collect data in the same instruments, for example `physical_exam` and `hematology`. The granularity of these tables is one row per study subject per event per *event instance*. The subject had two unscheduled visits. `redcap_event_instance` allows us to match `physical_exam` and `hematology` responses which occurred on the same unscheduled visit.
```{r, results='hold'}
@@ -174,7 +173,31 @@ Note that REDCapTidieR allows for an instrument to be associated with both repea
REDCapTidieR supports projects with multiple arms. If you have a project with multiple arms, there will be an additional column `redcap_arm` to identify the arm that the row is associated with.
-## Categorical variables
+### Mixed Structure Instruments
+
+By default, REDCapTidieR does not allow you to have the same instrument designated both as a repeating and as a nonrepeating instrument in different events (i.e. a "mixed structure instrument"), and will throw an error if this is detected:
+
+```
+Error in `clean_redcap_long()` at REDCapTidieR/R/read_redcap.R:272:5:
+✖ Instruments detected that have both repeating and nonrepeating instances defined in the project: mixed_structure_1 and mixed_structure_form_complete
+ℹ Set `allow_mixed_structure` to `TRUE` to override. See Mixed Structure Instruments for more information.
+```
+
+This is because such a design inherently goes against [tidy](glossary.html#tidy) data principles.
+
+However, as of REDCapTidieR v1.1.0 it is now possible to override this behavior by setting `allow_mixed_structure` in `read_redcap()` to `TRUE`. When enabled, nonrepeating variants of mixed structure instruments will be treated as repeating instruments with a single repeating instance.
+
+Users are cautioned when enabling this feature, since it changes definitions in the original data output. To visually assist with this, you will see that `structure` in the [supertibble](glossary.html#supertibble) will say "mixed":
+
+```{r}
+read_redcap(redcap_uri,
+ mixed_token,
+ allow_mixed_structure = TRUE
+) |>
+ rmarkdown::paged_table()
+```
+
+## Categorical Variables
REDCapTidieR performs a number of opinionated transformations on categorical [fields](glossary.html#field) to streamline exploring them and working with them.
diff --git a/vignettes/glossary.Rmd b/vignettes/glossary.Rmd
index 0d44116a..7382eaa1 100644
--- a/vignettes/glossary.Rmd
+++ b/vignettes/glossary.Rmd
@@ -227,7 +227,7 @@ The [skimr](https://docs.ropensci.org/skimr/) R package provides summary statist
### Structure {#structure}
-The structure of an [instrument](#instrument) can be [repeating](#repeating-instrument) or [nonrepeating](#nonrepeating-instrument). The [supertibble](#supertibble) shows the instrument's structure in the `structure` column. The structure of a [project](#project) can be [classic](#classic-project), [longitudinal](#longitudinal-project), or longitudinal with [arms](#arm). The structure of an [event](#event) can be [repeating](#repeating-event) or [nonrepeating](#nonrepeating-event). The [granularity](#granularity) of a [data tibble](#data-tibble) depends on the structure of all three: the instrument, the project, and the events associated with the instrument. Note: REDCap does not allow repeating instruments inside a repeating event. See also: the section [Longitudinal REDCap projects](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#longitudinal-redcap-projects) in the [Diving Deeper vignette](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html). ↩︎
+The structure of an [instrument](#instrument) can be [repeating](#repeating-instrument), [nonrepeating](#nonrepeating-instrument), or mixed. The [supertibble](#supertibble) shows the instrument's structure in the `structure` column. The structure of a [project](#project) can be [classic](#classic-project), [longitudinal](#longitudinal-project), or longitudinal with [arms](#arm). The structure of an [event](#event) can be [repeating](#repeating-event) or [nonrepeating](#nonrepeating-event). As of REDCapTidier v1.1.0, [mixed structure instruments](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#mixed-structure-instruments) are supported. The [granularity](#granularity) of a [data tibble](#data-tibble) depends on the structure of all three: the instrument, the project, and the events associated with the instrument. Note: REDCap does not allow repeating instruments inside a repeating event. See also: the section [Longitudinal REDCap projects](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#longitudinal-redcap-projects) in the [Diving Deeper vignette](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html). ↩︎
### Supertibble {#supertibble}