Skip to content

Commit

Permalink
Merge pull request #177 from CHOP-CGTInformatics/repeat-nonrepeat-sup…
Browse files Browse the repository at this point in the history
…port

Add Mixed Repeat/Nonrepeat Instrument Support
  • Loading branch information
rsh52 authored Mar 11, 2024
2 parents 706b7a2 + 3b0c0f8 commit 5ab694e
Show file tree
Hide file tree
Showing 22 changed files with 433 additions and 112 deletions.
1 change: 1 addition & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
REDCAPTIDIER_LARGE_SPARSE_API: ${{ secrets.REDCAPTIDIER_LARGE_SPARSE_API }}
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
REDCAPTIDIER_LONGITUDINAL_DAG_API: ${{ secrets.REDCAPTIDIER_LONGITUDINAL_DAG_API }}
REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }}
steps:
- name: Update Ubuntu, Install cURL Headers, add Libraries
run: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ jobs:
SUPERHEROES_REDCAP_API: ${{ secrets.SUPERHEROES_REDCAP_API }}
REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API: ${{ secrets.REDCAPTIDIER_DEEP_DIVE_VIGNETTE_API }}
REDCAPTIDIER_DAG_API: ${{ secrets.REDCAPTIDIER_DAG_API }}
REDCAPTIDIER_MIXED_STRUCTURE_API: ${{ secrets.REDCAPTIDIER_MIXED_STRUCTURE_API }}
steps:
- uses: actions/checkout@v3

Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: REDCapTidieR
Type: Package
Title: Extract 'REDCap' Databases into Tidy 'Tibble's
Version: 1.0.0
Version: 1.1.0
Authors@R: c(
person("Richard", "Hanna", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0009-0005-6496-8154")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ importFrom(rlang,try_fetch)
importFrom(rlang,zap)
importFrom(stringi,stri_split_fixed)
importFrom(stringr,str_detect)
importFrom(stringr,str_ends)
importFrom(stringr,str_replace)
importFrom(stringr,str_replace_all)
importFrom(stringr,str_squish)
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# REDCapTidieR 1.1.0

- `read_redcap()` now supports instruments that follow a mixed repeating/non-repeating structure with the `allow_mixed_structure` parameter
- When enabled, instruments with mixed repeating/nonrepeating structure will be treated as single-instance repeating instruments

# REDCapTidieR 1.0.0

Version 1.0.0
Expand Down
2 changes: 1 addition & 1 deletion R/REDCapTidieR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' is_installed new_environment quo_get_expr try_fetch zap as_label
#' @importFrom stringi stri_split_fixed
#' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc
#' str_trim
#' str_trim str_ends
#' @importFrom tibble as_tibble is_tibble tibble
#' @importFrom tidyr complete fill pivot_wider nest unnest unnest_wider
#' @importFrom tidyselect all_of any_of ends_with eval_select everything
Expand Down
49 changes: 13 additions & 36 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,48 +101,25 @@ check_user_rights <- function(db_data,


check_repeat_and_nonrepeat <- function(db_data, call = caller_env()) {
# Identify columns to check for repeat/nonrepeat behavior
safe_cols <- c(
names(db_data)[1], "redcap_event_name",
"redcap_repeat_instrument", "redcap_repeat_instance",
"redcap_data_access_group"
)

check_cols <- setdiff(names(db_data), safe_cols)

# Set up check_data function that looks for repeating and nonrepeating
# behavior in a given column and returns a boolean
check_data <- function(db_data, check_col) {
# Repeating Check
rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"]))

# Nonrepeating Check
nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"]))

rep & nonrep
}

# Create a simple dataframe, loop through check columns and append
# dataframe with column being checked and the output of check_data
out <- data.frame()
for (i in seq_along(check_cols)) {
rep_and_nonrep <- db_data %>%
check_data(check_col = check_cols[i])

field <- check_cols[i]

out <- rbind(out, data.frame(field, rep_and_nonrep))
out
}
out <- get_mixed_structure_fields(db_data = db_data)

# Filter for violations
out <- out %>%
filter(rep_and_nonrep)
filter(.data$rep_and_nonrep)

# Produce error message if violations detected
if (nrow(out) > 0) {
cli_abort(c("x" = "Instrument{?s} detected that ha{?s/ve} both repeating and
nonrepeating instances defined in the project: {out$field}"),
cli_abort(
c(
"x" = "Instrument{?s} detected that ha{?s/ve} both repeating and
nonrepeating instances defined in the project: {out$field}",
"i" = paste0(
"Set {.code allow_mixed_structure} to {.code TRUE} to override. ",
"See ",
"{.href [Mixed Structure Instruments](https://chop-cgtinformatics.github.io/REDCapTidieR/articles/diving_deeper.html#mixed-structure-instruments)} ", # nolint line_length_linter
"for more information."
)
),
class = c("repeat_nonrepeat_instrument", "REDCapTidieR_cond"),
call = call
)
Expand Down
168 changes: 147 additions & 21 deletions R/clean_redcap_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
#' \code{REDCapR::redcap_metadata_read()$data}
#' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap
#' events/arms
#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating
#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions
#' as repeating instruments with a single instance. Applies to longitudinal projects
#' only. Default `FALSE`.
#'
#' @return
#' Returns a \code{tibble} with list elements containing tidy dataframes. Users
Expand All @@ -22,7 +26,8 @@

clean_redcap_long <- function(db_data_long,
db_metadata_long,
linked_arms) {
linked_arms,
allow_mixed_structure = FALSE) {
# Repeating Instrument Check ----
# Check if database supplied contains any repeating instruments to map onto
# `redcap_repeat_*` variables
Expand All @@ -33,30 +38,13 @@ clean_redcap_long <- function(db_data_long,
assert_data_frame(db_data_long)
assert_data_frame(db_metadata_long)

if (has_repeat_forms) {
check_repeat_and_nonrepeat(db_data_long)
}

## Repeating Instruments Logic ----
## Repeating Forms Assignment ----
# Needed first to inform nonrepeating forms logic
if (has_repeat_forms) {
repeated_forms <- db_data_long %>%
filter(!is.na(.data$redcap_repeat_instrument)) %>%
pull(.data$redcap_repeat_instrument) %>%
unique()

repeated_forms_tibble <- tibble(
redcap_form_name = repeated_forms,
redcap_data = map(
.data$redcap_form_name,
~ distill_repeat_table_long(
.x,
db_data_long,
db_metadata_long,
linked_arms
)
),
structure = "repeating"
)
}

## Nonrepeating Instruments Logic ----
Expand Down Expand Up @@ -86,6 +74,47 @@ clean_redcap_long <- function(db_data_long,
structure = "nonrepeating"
)

## Repeating Instruments Logic ----
if (has_repeat_forms) {
# If mixed structure allowed, retrieve mixed structure forms
has_mixed_structure_forms <- FALSE # nolint: object_usage_linter

mixed_structure_ref <- data.frame()

if (allow_mixed_structure) {
# Retrieve mixed structure fields and forms in reference df
mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>%
filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>%
left_join(db_metadata_long %>% select(.data$field_name, .data$form_name),
by = "field_name"
)

# Update if project actually has mixed structure
has_mixed_structure_forms <- nrow(mixed_structure_ref) > 0
} else {
check_repeat_and_nonrepeat(db_data_long)
}

repeated_forms_tibble <- tibble(
redcap_form_name = repeated_forms,
redcap_data = map(
.data$redcap_form_name,
~ distill_repeat_table_long(
.x,
db_data_long,
db_metadata_long,
linked_arms,
has_mixed_structure_forms = has_mixed_structure_forms,
mixed_structure_ref = mixed_structure_ref
)
),
structure = case_when(
has_mixed_structure_forms & redcap_form_name %in% mixed_structure_ref$form_name ~ "mixed",
TRUE ~ "repeating"
)
)
}

if (has_repeat_forms) {
rbind(repeated_forms_tibble, nonrepeated_forms_tibble)
} else {
Expand Down Expand Up @@ -235,13 +264,19 @@ distill_nonrepeat_table_long <- function(form_name,
#' \code{REDCapR::redcap_metadata_read()$data}
#' @param linked_arms Output of \code{link_arms}, linking instruments to REDCap
#' events/arms
#' @param has_mixed_structure Whether the instrument under evaluation has a mixed
#' structure. Default `FALSE`.
#' @param name mixed_structure_ref A mixed structure reference dataframe supplied
#' by `get_mixed_structure_fields()`.
#'
#' @keywords internal

distill_repeat_table_long <- function(form_name,
db_data_long,
db_metadata_long,
linked_arms) {
linked_arms,
has_mixed_structure_forms = FALSE,
mixed_structure_ref = NULL) {
has_repeat_forms <- "redcap_repeat_instance" %in% names(db_data_long)

my_record_id <- names(db_data_long)[1]
Expand Down Expand Up @@ -275,6 +310,11 @@ distill_repeat_table_long <- function(form_name,
my_fields <- c(my_fields, "redcap_data_access_group")
}

# If has mixed structure, convert form
if (has_mixed_structure_forms) {
db_data_long <- convert_mixed_instrument(db_data_long, mixed_structure_ref %>% filter(form_name == my_form))
}

# Setup data for loop redcap_arm linking
db_data_long <- db_data_long %>%
add_partial_keys(var = .data$redcap_event_name) %>%
Expand Down Expand Up @@ -337,3 +377,89 @@ distill_repeat_table_long <- function(form_name,
out %>%
tibble()
}

#' @title Convert Mixed Structure Instruments to Repeating Instruments
#'
#' @description
#' For longitudinal projects where users set `allow_mixed_structure` to `TRUE`,
#' this function will handle the process of setting the nonrepeating parts of the
#' instrument to repeating ones with a single instance.
#'
#' @param db_data_long The longitudinal REDCap database output defined by
#' \code{REDCapR::redcap_read_oneshot()$data}
#' @param mixed_structure_ref Reference dataframe containing mixed structure
#' fields and forms.
#'
#' @return
#' Returns a \code{tibble} with list elements containing tidy dataframes. Users
#' can access dataframes under the \code{redcap_data} column with reference to
#' \code{form_name} and \code{structure} column details.
#'
#' @keywords internal

convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
for (i in seq_len(nrow(mixed_structure_ref))) {
field <- mixed_structure_ref$field_name[i]
form <- mixed_structure_ref$form_name[i]

# Create a logical mask for rows needing update
update_mask <- is.na(db_data_long$redcap_repeat_instance) & !is.na(db_data_long[[field]])

# Update redcap_repeat_instance
db_data_long$redcap_repeat_instance <- if_else(update_mask, 1, db_data_long$redcap_repeat_instance)

# Update redcap_repeat_instrument
db_data_long$redcap_repeat_instrument <- if_else(update_mask, form, db_data_long$redcap_repeat_instrument)
}

db_data_long
}

#' @title Get Mixed Structure Instrument List
#'
#' @description
#' Define fields in a given project that are used in both a repeating and
#' nonrepeating manner.
#'
#' @param db_data The REDCap database output generated by
#' \code{REDCapR::redcap_read_oneshot()$data}
#'
#' @returns a dataframe
#'
#' @keywords internal

get_mixed_structure_fields <- function(db_data) {
# Identify columns to check for repeat/nonrepeat behavior
safe_cols <- c(
names(db_data)[1], "redcap_event_name",
"redcap_repeat_instrument", "redcap_repeat_instance",
"redcap_data_access_group"
)

check_cols <- setdiff(names(db_data), safe_cols)

# Set up check_data function that looks for repeating and nonrepeating
# behavior in a given column and returns a boolean
check_data <- function(db_data, check_col) {
# Repeating Check
rep <- any(!is.na(db_data[{{ check_col }}]) & !is.na(db_data["redcap_repeat_instrument"]))

# Nonrepeating Check
nonrep <- any(!is.na(db_data[{{ check_col }}]) & is.na(db_data["redcap_repeat_instrument"]))

rep & nonrep
}

# Create a simple dataframe, loop through check columns and append
# dataframe with column being checked and the output of check_data
out <- data.frame()
for (i in seq_along(check_cols)) {
rep_and_nonrep <- db_data %>%
check_data(check_col = check_cols[i])

field_name <- check_cols[i]

out <- rbind(out, data.frame(field_name, rep_and_nonrep))
}
out
}
11 changes: 9 additions & 2 deletions R/read_redcap.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
#' @param guess_max A positive [base::numeric] value
#' passed to [readr::read_csv()] that specifies the maximum number of records to
#' use for guessing column types. Default `.Machine$integer.max`.
#' @param allow_mixed_structure A logical to allow for support of mixed repeating/non-repeating
#' instruments. Setting to `TRUE` will treat the mixed instrument's non-repeating versions
#' as repeating instruments with a single instance. Applies to longitudinal projects
#' only. Default `FALSE`. Can be set globally with `options(redcaptidier.allow.mixed.structure = FALSE)`.
#'
#' @examples
#' \dontrun{
Expand All @@ -75,7 +79,8 @@ read_redcap <- function(redcap_uri,
export_survey_fields = NULL,
export_data_access_groups = NULL,
suppress_redcapr_messages = TRUE,
guess_max = .Machine$integer.max) {
guess_max = .Machine$integer.max,
allow_mixed_structure = getOption("redcaptidier.allow.mixed.structure", FALSE)) {
check_arg_is_character(redcap_uri, len = 1, any.missing = FALSE)
check_arg_is_character(token, len = 1, any.missing = FALSE)
check_arg_is_valid_token(token)
Expand All @@ -84,6 +89,7 @@ read_redcap <- function(redcap_uri,
check_arg_is_logical(export_survey_fields, len = 1, any.missing = FALSE, null.ok = TRUE)
check_arg_is_logical(export_data_access_groups, len = 1, any.missing = FALSE, null.ok = TRUE)
check_arg_is_logical(suppress_redcapr_messages, len = 1, any.missing = FALSE)
check_arg_is_logical(allow_mixed_structure, len = 1, any.missing = FALSE)

# Load REDCap Metadata ----
# Capture unexpected metadata API call errors
Expand Down Expand Up @@ -267,7 +273,8 @@ read_redcap <- function(redcap_uri,
out <- clean_redcap_long(
db_data_long = db_data,
db_metadata_long = db_metadata,
linked_arms = linked_arms
linked_arms = linked_arms,
allow_mixed_structure = allow_mixed_structure
)
} else {
out <- clean_redcap(
Expand Down
Binary file added inst/testdata/db_metadata_mixed_structure.RDS
Binary file not shown.
Binary file added inst/testdata/db_mixed_structure.RDS
Binary file not shown.
Binary file added inst/testdata/db_mixed_structure_linked_arms.RDS
Binary file not shown.
Loading

0 comments on commit 5ab694e

Please sign in to comment.