From 0971400b090b74e838fa9ffd02d952b2fc41dff4 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 9 Sep 2024 16:15:35 -0400 Subject: [PATCH 01/17] Draft join_data_tibbles - no documentation yet - no support for mixed structure insturments yet --- R/join_data_tibbles.R | 49 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 R/join_data_tibbles.R diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R new file mode 100644 index 00000000..4b7b56cd --- /dev/null +++ b/R/join_data_tibbles.R @@ -0,0 +1,49 @@ +join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "new_tibble", suffix = c(".x", ".y")) { + record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) + + # Append the supertibble with the primary keys column + suprtbl <- suprtbl |> + mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>% + select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks) + + # Extract the user defined tibbles and assign the structure and pks to attributes + tbl_x <- extract_tibble(suprtbl, tbl = x) + attributes(tbl_x)$structure <- suprtbl$structure[suprtbl$redcap_form_name == x] + attributes(tbl_x)$pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE) + + tbl_y <- extract_tibble(suprtbl, tbl = y) + attributes(tbl_y)$structure <- suprtbl$structure[suprtbl$redcap_form_name == y] + attributes(tbl_y)$pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% stringr::str_split(", ", simplify = TRUE) + + # Define a named list of join functions corresponding to the join types + join_functions <- list( + left = dplyr::left_join, + right = dplyr::right_join, + inner = dplyr::inner_join, + full = dplyr::full_join + ) + + # Check if the specified type is valid + # TODO: Make a standard check function with cli + if (!type %in% names(join_functions)) { + stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") + } + + # Mixed structure requires special handling + is_mixed <- any(c(attr(tbl_x, "structure"), attr(tbl_y, "structure")) == "mixed") + + if (is_mixed) { + return() # TODO: this is the complicated part + } else { + rlang::exec( + join_functions[[type]], tbl_x, tbl_y, by = intersect(attr(tbl_x, "pks"), attr(tbl_y, "pks")), suffix + ) + } +} + +extract_keys <- function(suprtbl, record_id_field) { + suprtbl |> + colnames() |> + intersect(c(record_id_field, "redcap_event", "redcap_form_instance", "redcap_event_instance", "redcap_arm")) |> + paste(collapse = ", ") +} From e544edb007cd7d74fea9d4bf062a23124004693d Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 9 Sep 2024 16:32:59 -0400 Subject: [PATCH 02/17] Consolidate code --- R/join_data_tibbles.R | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R index 4b7b56cd..cb3480da 100644 --- a/R/join_data_tibbles.R +++ b/R/join_data_tibbles.R @@ -6,14 +6,8 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>% select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks) - # Extract the user defined tibbles and assign the structure and pks to attributes - tbl_x <- extract_tibble(suprtbl, tbl = x) - attributes(tbl_x)$structure <- suprtbl$structure[suprtbl$redcap_form_name == x] - attributes(tbl_x)$pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE) - - tbl_y <- extract_tibble(suprtbl, tbl = y) - attributes(tbl_y)$structure <- suprtbl$structure[suprtbl$redcap_form_name == y] - attributes(tbl_y)$pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% stringr::str_split(", ", simplify = TRUE) + tbl_x <- prepare_tibble(suprtbl, x) + tbl_y <- prepare_tibble(suprtbl, y) # Define a named list of join functions corresponding to the join types join_functions <- list( @@ -33,7 +27,7 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n is_mixed <- any(c(attr(tbl_x, "structure"), attr(tbl_y, "structure")) == "mixed") if (is_mixed) { - return() # TODO: this is the complicated part + stop("Mixed structure table detected, this feature is not currently supported.") # TODO: Fix, this is the complicated part } else { rlang::exec( join_functions[[type]], tbl_x, tbl_y, by = intersect(attr(tbl_x, "pks"), attr(tbl_y, "pks")), suffix @@ -42,8 +36,19 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n } extract_keys <- function(suprtbl, record_id_field) { + redcap_keys <- c(record_id_field, "redcap_event", "redcap_form_instance", + "redcap_event_instance", "redcap_arm") + suprtbl |> colnames() |> - intersect(c(record_id_field, "redcap_event", "redcap_form_instance", "redcap_event_instance", "redcap_arm")) |> + intersect(redcap_keys) |> paste(collapse = ", ") } + +prepare_tibble <- function(suprtbl, tbl_name) { + tbl <- extract_tibble(suprtbl, tbl = tbl_name) + attributes(tbl)$structure <- suprtbl$structure[suprtbl$redcap_form_name == tbl_name] + attributes(tbl)$pks <- suprtbl$pks[suprtbl$redcap_form_name == tbl_name] %>% + stringr::str_split(", ", simplify = TRUE) + return(tbl) +} From 7dd592b53eb9cbd9b48a7c4fc9366032df2787c1 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Tue, 10 Sep 2024 16:35:02 -0400 Subject: [PATCH 03/17] Some updates --- R/join_data_tibbles.R | 61 +++++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R index cb3480da..7ba38714 100644 --- a/R/join_data_tibbles.R +++ b/R/join_data_tibbles.R @@ -1,37 +1,26 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "new_tibble", suffix = c(".x", ".y")) { record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) + join_fn <- get_join_fn(type) # Append the supertibble with the primary keys column suprtbl <- suprtbl |> mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>% select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks) - tbl_x <- prepare_tibble(suprtbl, x) - tbl_y <- prepare_tibble(suprtbl, y) + tbl_x <- extract_tibble(suprtbl, x) + tbl_x_type <- get_structure(suprtbl, x) + tbl_y <- extract_tibble(suprtbl, y) + tbl_y_type <- get_structure(suprtbl, y) - # Define a named list of join functions corresponding to the join types - join_functions <- list( - left = dplyr::left_join, - right = dplyr::right_join, - inner = dplyr::inner_join, - full = dplyr::full_join - ) - - # Check if the specified type is valid - # TODO: Make a standard check function with cli - if (!type %in% names(join_functions)) { - stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") - } + by <- build_by(suprtbl, x, y) # Mixed structure requires special handling - is_mixed <- any(c(attr(tbl_x, "structure"), attr(tbl_y, "structure")) == "mixed") + is_mixed <- any(c(tbl_x_type, tbl_y_type) == "mixed") if (is_mixed) { stop("Mixed structure table detected, this feature is not currently supported.") # TODO: Fix, this is the complicated part } else { - rlang::exec( - join_functions[[type]], tbl_x, tbl_y, by = intersect(attr(tbl_x, "pks"), attr(tbl_y, "pks")), suffix - ) + join_fn(tbl_x, tbl_y, by = by, suffix = suffix) } } @@ -45,10 +34,36 @@ extract_keys <- function(suprtbl, record_id_field) { paste(collapse = ", ") } -prepare_tibble <- function(suprtbl, tbl_name) { +get_structure <- function(suprtbl, tbl_name) { tbl <- extract_tibble(suprtbl, tbl = tbl_name) - attributes(tbl)$structure <- suprtbl$structure[suprtbl$redcap_form_name == tbl_name] - attributes(tbl)$pks <- suprtbl$pks[suprtbl$redcap_form_name == tbl_name] %>% + suprtbl$structure[suprtbl$redcap_form_name == tbl_name] +} + +get_join_fn <- function(type) { + + join_functions <- list( + left = dplyr::left_join, + right = dplyr::right_join, + inner = dplyr::inner_join, + full = dplyr::full_join + ) + + # Check if the specified type is valid + # TODO: Make a standard check function with cli + if (!type %in% names(join_functions)) { + stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") + } + + join_functions[[type]] +} + +build_by <- function(suprtbl, x, y) { + + x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% + stringr::str_split(", ", simplify = TRUE) + y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% stringr::str_split(", ", simplify = TRUE) - return(tbl) + + intersect(x_pks, y_pks) + } From e80733ebcb05bf5c14155602738523717c08bfc3 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 16 Sep 2024 13:29:32 -0400 Subject: [PATCH 04/17] Update to include mixed_structure handling --- R/join_data_tibbles.R | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R index 7ba38714..d77975a4 100644 --- a/R/join_data_tibbles.R +++ b/R/join_data_tibbles.R @@ -1,4 +1,9 @@ -join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "new_tibble", suffix = c(".x", ".y")) { +join_data_tibbles <- function(suprtbl, + x, + y, + by = NULL, + type = "left", + suffix = c(".x", ".y")) { record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) join_fn <- get_join_fn(type) @@ -12,16 +17,20 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n tbl_y <- extract_tibble(suprtbl, y) tbl_y_type <- get_structure(suprtbl, y) - by <- build_by(suprtbl, x, y) - # Mixed structure requires special handling is_mixed <- any(c(tbl_x_type, tbl_y_type) == "mixed") if (is_mixed) { - stop("Mixed structure table detected, this feature is not currently supported.") # TODO: Fix, this is the complicated part - } else { - join_fn(tbl_x, tbl_y, by = by, suffix = suffix) + required_columns <- c("redcap_event_instance", "redcap_form_instance") + tbl_x <- add_missing_columns(tbl_x, required_columns) + tbl_y <- add_missing_columns(tbl_y, required_columns) } + + join_fn <- get_join_fn(type) + by <- build_by(suprtbl, x, y, is_mixed) + + join_fn(tbl_x, tbl_y, by = by, suffix = suffix) %>% + relocate(starts_with("form_status_complete"), .after = everything()) } extract_keys <- function(suprtbl, record_id_field) { @@ -57,13 +66,27 @@ get_join_fn <- function(type) { join_functions[[type]] } -build_by <- function(suprtbl, x, y) { +build_by <- function(suprtbl, x, y, is_mixed) { x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE) y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% stringr::str_split(", ", simplify = TRUE) - intersect(x_pks, y_pks) + out <- intersect(x_pks, y_pks) + + if (is_mixed) { + out <- c(out, "redcap_event_instance", "redcap_form_instance") %>% + # TODO: Make standard, currently needed for repeat/mixed joins + unique() + } + out } + +add_missing_columns <- function(tbl, columns) { + missing_cols <- setdiff(columns, names(tbl)) + tbl[missing_cols] <- NA + return(tbl) +} + From 9eee96e7f16782e5a9cf46f03a89071d16d14c48 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Thu, 19 Sep 2024 16:52:31 -0400 Subject: [PATCH 05/17] Update how read_redcap processes mixed RS records --- R/clean_redcap_long.R | 31 +++++++++++++++++++++++---- R/utils.R | 50 +++++++++++++++++++++++++------------------ 2 files changed, 56 insertions(+), 25 deletions(-) diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R index ec5941ea..e33cfad7 100644 --- a/R/clean_redcap_long.R +++ b/R/clean_redcap_long.R @@ -318,7 +318,7 @@ distill_repeat_table_long <- function(form_name, db_data_long <- db_data_long %>% add_partial_keys(var = .data$redcap_event_name) %>% filter( - !is.na(.data$redcap_form_instance) & + (!is.na(.data$redcap_form_instance) | !is.na(redcap_event_instance)) & .data$redcap_repeat_instrument == my_form ) @@ -416,14 +416,37 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) { ) ) + repeat_together_present <- any(is.na(db_data_long$redcap_repeat_instrument) & !is.na(db_data_long$redcap_repeat_instance)) + + if (!"redcap_event_instance" %in% names(db_data_long) & repeat_together_present) { + db_data_long <- db_data_long %>% + mutate( + redcap_event_instance = NA + ) %>% + relocate(redcap_event_instance, .after = redcap_repeat_instance) + } + + if (repeat_together_present) { + db_data_long <- db_data_long %>% + mutate( + redcap_event_instance = case_when( + # Shift form instances to even instances for repeat-together types + update_mask & is.na(redcap_repeat_instrument) ~ redcap_repeat_instance, + # Otherwise + TRUE ~ redcap_event_instance + ) + ) + } + # Assign update data based on rules below db_data_long <- db_data_long %>% mutate( redcap_repeat_instance = case_when( # Add single instance repeat event instance vals when none exist - update_mask & is.na(redcap_repeat_instance) ~ 1, - # Keep repeat event instance vals when they already exist - update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance, + # update_mask & is.na(redcap_repeat_instance) ~ 1, + # update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance, # TODO: Remove + # If repeat-together type, remove values from redcap_repeat_instance (shifted and captured in redcap_event_instance) + update_mask & is.na(redcap_repeat_instrument) ~ NA, TRUE ~ .data$redcap_repeat_instance ), redcap_repeat_instrument = case_when( diff --git a/R/utils.R b/R/utils.R index 44045c6d..203b75d0 100644 --- a/R/utils.R +++ b/R/utils.R @@ -64,35 +64,43 @@ create_repeat_instance_vars <- function(db_data) { } # Detect if repeat events exist - # Determined by non-NA vals in new "redcap_form_instance" alongside - # NA vals in "redcap_repeat_instrument" - # `has_repeat_forms` will always be TRUE for events to exist - if (has_repeat_forms) { + # First determined if redcap_event_instance added during mixed structure handling + # See: convert_mixed_instrument() + has_repeat_events <- "redcap_event_instance" %in% names(out) + + # Next determined by non-NA vals in new "redcap_form_instance" alongside + # NA vals in "redcap_repeat_instrument" + # `has_repeat_forms` will always be TRUE for events to exist + if (has_repeat_forms & !has_repeat_events) { has_repeat_events <- any( is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance) ) - } else { - has_repeat_events <- FALSE } if (has_repeat_events) { - out$redcap_event_instance <- ifelse( - is.na(out$redcap_repeat_instrument) & - !is.na(out$redcap_form_instance), - out$redcap_form_instance, - NA - ) - out$redcap_form_instance <- ifelse( - is.na(out$redcap_repeat_instrument) & - !is.na(out$redcap_form_instance), - NA, - out$redcap_form_instance - ) + # In cases where there are repeating events but they were not added by + # convert_mixed_instrument(), add an empty redcap_event_instance column + if (!"redcap_event_instance" %in% names(out)) { + out$redcap_event_instance <- NA + } + + out <- out %>% + mutate( + redcap_event_instance = case_when( + is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ redcap_form_instance, + # Else leave NA or the value given by conver_mixed_instrument() + TRUE ~ redcap_event_instance + ), + redcap_form_instance = case_when( + is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ NA, + TRUE ~ redcap_form_instance + ) + ) out <- relocate(out, - "redcap_event_instance", - .after = "redcap_form_instance" + "redcap_event_instance", + .after = "redcap_form_instance" ) } @@ -185,7 +193,7 @@ parse_labels <- function(string, return_vector = FALSE, return_stripped_text_fla # If string is empty/NA, throw a warning if (is.na(string)) { cli_warn("Empty string detected for a given multiple choice label.", - class = c("empty_parse_warning", "REDCapTidieR_cond") + class = c("empty_parse_warning", "REDCapTidieR_cond") ) } From 0e755f0f70dfd483e871a910edb4f7ad52013d47 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 20 Sep 2024 14:15:47 -0400 Subject: [PATCH 06/17] Add support to retrieve repeat event types, fix add_partial_keys bug --- R/clean_redcap_long.R | 24 +++++++----- R/read_redcap.R | 63 ++++++++++++++++++++++++++++--- R/utils.R | 16 ++++---- man/add_event_mapping.Rd | 5 ++- man/get_repeat_event_types.Rd | 29 ++++++++++++++ tests/testthat/test-read_redcap.R | 20 ++++++++++ tests/testthat/test-utils.R | 12 ++++-- 7 files changed, 143 insertions(+), 26 deletions(-) create mode 100644 man/get_repeat_event_types.Rd diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R index e33cfad7..750578cc 100644 --- a/R/clean_redcap_long.R +++ b/R/clean_redcap_long.R @@ -84,7 +84,9 @@ clean_redcap_long <- function(db_data_long, # Retrieve mixed structure fields and forms in reference df mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>% filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>% - left_join(db_metadata_long %>% select("field_name", "form_name"), + left_join( + db_metadata_long %>% + select("field_name", "form_name"), by = "field_name" ) @@ -318,7 +320,10 @@ distill_repeat_table_long <- function(form_name, db_data_long <- db_data_long %>% add_partial_keys(var = .data$redcap_event_name) %>% filter( - (!is.na(.data$redcap_form_instance) | !is.na(redcap_event_instance)) & + ( + !is.na(.data$redcap_form_instance) | + if_any(starts_with("redcap_event_instance"), ~ !is.na(.)) + ) & .data$redcap_repeat_instrument == my_form ) @@ -416,14 +421,17 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) { ) ) - repeat_together_present <- any(is.na(db_data_long$redcap_repeat_instrument) & !is.na(db_data_long$redcap_repeat_instance)) + repeat_together_present <- any( + is.na(db_data_long$redcap_repeat_instrument) & + !is.na(db_data_long$redcap_repeat_instance) + ) - if (!"redcap_event_instance" %in% names(db_data_long) & repeat_together_present) { + if (!"redcap_event_instance" %in% names(db_data_long) && repeat_together_present) { db_data_long <- db_data_long %>% mutate( redcap_event_instance = NA ) %>% - relocate(redcap_event_instance, .after = redcap_repeat_instance) + relocate(.data$redcap_event_instance, .after = .data$redcap_repeat_instance) } if (repeat_together_present) { @@ -442,10 +450,8 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) { db_data_long <- db_data_long %>% mutate( redcap_repeat_instance = case_when( - # Add single instance repeat event instance vals when none exist - # update_mask & is.na(redcap_repeat_instance) ~ 1, - # update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance, # TODO: Remove - # If repeat-together type, remove values from redcap_repeat_instance (shifted and captured in redcap_event_instance) + # If repeat-together type, remove values from redcap_repeat_instance + # (shifted and captured in redcap_event_instance) update_mask & is.na(redcap_repeat_instrument) ~ NA, TRUE ~ .data$redcap_repeat_instance ), diff --git a/R/read_redcap.R b/R/read_redcap.R index 874540ce..f20ff34d 100644 --- a/R/read_redcap.R +++ b/R/read_redcap.R @@ -270,8 +270,11 @@ read_redcap <- function(redcap_uri, } if (is_longitudinal) { + repeat_event_types <- get_repeat_event_types(db_data) + linked_arms <- link_arms( - redcap_uri = redcap_uri, token = token, + redcap_uri = redcap_uri, + token = token, suppress_redcapr_messages = suppress_redcapr_messages ) @@ -292,7 +295,7 @@ read_redcap <- function(redcap_uri, out <- add_metadata(out, db_metadata, redcap_uri, token, suppress_redcapr_messages) if (is_longitudinal) { - out <- add_event_mapping(out, linked_arms) + out <- add_event_mapping(out, linked_arms, repeat_event_types) } out <- out %>% @@ -440,18 +443,21 @@ add_metadata <- function(supertbl, db_metadata, redcap_uri, token, suppress_redc #' @param supertbl a supertibble object to supplement with metadata #' @param linked_arms the tibble with event mappings created by #' \code{link_arms()} +#' @param repeat_event_types a dataframe output from [repeat_event_types()] which +#' specifies NR, RS, and RT types for events #' #' @return #' The original supertibble with an events \code{redcap_events} list column #' containing arms and events associated with each instrument #' #' @keywords internal -#' -add_event_mapping <- function(supertbl, linked_arms) { + +add_event_mapping <- function(supertbl, linked_arms, repeat_event_types) { event_info <- linked_arms %>% + left_join(repeat_event_types, by = c("unique_event_name" = "redcap_event_name")) %>% add_partial_keys(.data$unique_event_name) %>% select( - redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name" + redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name", "repeat_type" ) %>% nest(redcap_events = !"redcap_form_name") @@ -498,3 +504,50 @@ calc_metadata_stats <- function(data) { form_complete_pct = percent(form_complete_pct, digits = 2, format = "fg") ) } + +#' @title +#' Add identification for repeat event types +#' +#' @description +#' To correctly assign repeat event types a few assumptions must be made: +#' +#' - There are only 3 behaviors: nonrepeating, repeat_separately, and repeat_together +#' - If an event only shows `redcap_repeat_instance` and `redcap_repeat_instrument` +#' as `NA`, it can be considered a nonrepeat event. +#' - If an event is always `NA` for `redcap_repeat_instrument` and filled for `redcap_repeat_instance` +#' it can be assumed to be a repeat_together event +#' - repeat_separate and nonrepeating event types exhibit the same behavior along the +#' primary keys of the data. nonrepeating event types can have data display with +#' `redcap_repeat_instance`values both filled and as `NA`. If this is the case, +#' it can be assumed the event is a repeating separate event. +#' +#' @param data the REDCap data +#' +#' @return +#' A dataframe with unique event names mapped to their corresponding repeat types +#' +#' @keywords internal + +get_repeat_event_types <- function(data) { + out <- data %>% + dplyr::distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>% + mutate( + repeat_type = case_when( + !is.na(redcap_event_name) & !is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~ + "repeat_separate", + !is.na(redcap_event_name) & is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~ + "repeat_together", + TRUE ~ "nonrepeating" + ) + ) %>% + dplyr::distinct(.data$redcap_event_name, .data$repeat_type) + + # Check for instances where the same event is labelled as nonrepeating & repeating separate + # If this is the case, it must be repeating separate (there is just data that qualifies as both) + out %>% + mutate( + is_duplicated = duplicated(.data$repeat_type) | duplicated(.data$repeat_type, fromLast = TRUE) + ) %>% + filter(.data$is_duplicated == FALSE | (.data$is_duplicated == TRUE & .data$ repeat_type == "repeat_separate")) %>% + select(-.data$is_duplicated) +} diff --git a/R/utils.R b/R/utils.R index 203b75d0..e660031b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -18,12 +18,13 @@ add_partial_keys <- function(db_data, var = NULL) { if (!is.null(enexpr(var))) { - pattern <- "^(\\w+?)_arm_(\\d)$" + # Include handling for instances where REDCap appends with "_1b" or similar + pattern <- "^(\\w+?)_arm_(\\d+\\w?)$" db_data <- db_data %>% mutate( redcap_event = sub(pattern, "\\1", {{ var }}), - redcap_arm = as.integer(sub(pattern, "\\2", {{ var }})) + redcap_arm = as.factor(sub(pattern, "\\2", {{ var }})) ) } @@ -65,20 +66,19 @@ create_repeat_instance_vars <- function(db_data) { # Detect if repeat events exist # First determined if redcap_event_instance added during mixed structure handling - # See: convert_mixed_instrument() + # See: convert_mixed_instrument has_repeat_events <- "redcap_event_instance" %in% names(out) # Next determined by non-NA vals in new "redcap_form_instance" alongside # NA vals in "redcap_repeat_instrument" # `has_repeat_forms` will always be TRUE for events to exist - if (has_repeat_forms & !has_repeat_events) { + if (has_repeat_forms && !has_repeat_events) { has_repeat_events <- any( is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance) ) } if (has_repeat_events) { - # In cases where there are repeating events but they were not added by # convert_mixed_instrument(), add an empty redcap_event_instance column if (!"redcap_event_instance" %in% names(out)) { @@ -99,8 +99,8 @@ create_repeat_instance_vars <- function(db_data) { ) out <- relocate(out, - "redcap_event_instance", - .after = "redcap_form_instance" + "redcap_event_instance", + .after = "redcap_form_instance" ) } @@ -193,7 +193,7 @@ parse_labels <- function(string, return_vector = FALSE, return_stripped_text_fla # If string is empty/NA, throw a warning if (is.na(string)) { cli_warn("Empty string detected for a given multiple choice label.", - class = c("empty_parse_warning", "REDCapTidieR_cond") + class = c("empty_parse_warning", "REDCapTidieR_cond") ) } diff --git a/man/add_event_mapping.Rd b/man/add_event_mapping.Rd index 58a8ee22..3fe9a8a3 100644 --- a/man/add_event_mapping.Rd +++ b/man/add_event_mapping.Rd @@ -5,13 +5,16 @@ \title{Supplement a supertibble from a longitudinal database with information about the events associated with each instrument} \usage{ -add_event_mapping(supertbl, linked_arms) +add_event_mapping(supertbl, linked_arms, repeat_event_types) } \arguments{ \item{supertbl}{a supertibble object to supplement with metadata} \item{linked_arms}{the tibble with event mappings created by \code{link_arms()}} + +\item{repeat_event_types}{a dataframe output from \code{\link[=repeat_event_types]{repeat_event_types()}} which +specifies NR, RS, and RT types for events} } \value{ The original supertibble with an events \code{redcap_events} list column diff --git a/man/get_repeat_event_types.Rd b/man/get_repeat_event_types.Rd new file mode 100644 index 00000000..2d861189 --- /dev/null +++ b/man/get_repeat_event_types.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read_redcap.R +\name{get_repeat_event_types} +\alias{get_repeat_event_types} +\title{Add identification for repeat event types} +\usage{ +get_repeat_event_types(data) +} +\arguments{ +\item{data}{the REDCap data} +} +\value{ +A dataframe with unique event names mapped to their corresponding repeat types +} +\description{ +To correctly assign repeat event types a few assumptions must be made: +\itemize{ +\item There are only 3 behaviors: nonrepeating, repeat_separately, and repeat_together +\item If an event only shows \code{redcap_repeat_instance} and \code{redcap_repeat_instrument} +as \code{NA}, it can be considered a nonrepeat event. +\item If an event is always \code{NA} for \code{redcap_repeat_instrument} and filled for \code{redcap_repeat_instance} +it can be assumed to be a repeat_together event +\item repeat_separate and nonrepeating event types exhibit the same behavior along the +primary keys of the data. nonrepeating event types can have data display with +\code{redcap_repeat_instance}values both filled and as \code{NA}. If this is the case, +it can be assumed the event is a repeating separate event. +} +} +\keyword{internal} diff --git a/tests/testthat/test-read_redcap.R b/tests/testthat/test-read_redcap.R index 0683c212..194e63f0 100644 --- a/tests/testthat/test-read_redcap.R +++ b/tests/testthat/test-read_redcap.R @@ -641,3 +641,23 @@ test_that("read_redcap handles missing data codes", { }) |> expect_no_warning() }) + +test_that("get_repeat_event_types() works", { + mixed_data_structure <- tibble::tribble( + ~"record_id", ~"redcap_event_name", ~"redcap_repeat_instrument", ~"redcap_repeat_instance", + 1, "nonrepeat", NA, NA, + 1, "repeat_together", NA, 1, + 1, "repeat_separate", "mixed_structure_form", 1 + ) + + expected_out <- tibble::tribble( + ~"redcap_event_name", ~"repeat_type", + "nonrepeat", "nonrepeating", + "repeat_together", "repeat_together", + "repeat_separate", "repeat_separate" + ) + + out <- get_repeat_event_types(mixed_data_structure) + + expect_equal(out, expected_out) +}) diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 299be04e..344d4e9a 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -219,8 +219,10 @@ test_that("link_arms works", { expect_s3_class(out, "tbl") # output contains expected columns - expected_cols <- c("arm_num", "unique_event_name", "form", "arm_name", - "event_name", "custom_event_label", "event_id") + expected_cols <- c( + "arm_num", "unique_event_name", "form", "arm_name", + "event_name", "custom_event_label", "event_id" + ) expect_setequal(expected_cols, names(out)) # all arms are represented in output (test redcap has 2 arms) @@ -346,7 +348,8 @@ test_that("add_partial_keys works", { 1, "nr_event_arm_1", NA, NA, 1, "nr_event_arm_1", "r_instrument", 1, 3, "nr_event_arm_1", "r_instrument", 1, - 4, "r_event_arm_1", NA, 1 + 4, "r_event_arm_1", NA, 1, + 5, "r_event_arm_1b", NA, 1 ) out <- test_data %>% @@ -365,6 +368,9 @@ test_that("add_partial_keys works", { expect_true(all(expected_cols %in% names(out))) expect_s3_class(out, "data.frame") expect_true(nrow(out) > 0) + + expected_redcap_arm_col <- factor(c(1, 1, 1, 1, "1b")) + expect_equal(out$redcap_arm, expected_redcap_arm_col) }) test_that("create_repeat_instance_vars works", { From 3ce0e120c3bc67ac07364d5e05022b73887737ca Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 20 Sep 2024 14:15:57 -0400 Subject: [PATCH 07/17] Update join_data_tibbles --- R/join_data_tibbles.R | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R index d77975a4..e09a4d41 100644 --- a/R/join_data_tibbles.R +++ b/R/join_data_tibbles.R @@ -4,13 +4,14 @@ join_data_tibbles <- function(suprtbl, by = NULL, type = "left", suffix = c(".x", ".y")) { - record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) + record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) # nolint: object_usage_linter join_fn <- get_join_fn(type) # Append the supertibble with the primary keys column suprtbl <- suprtbl |> - mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>% - select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks) + mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>% + select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data, + .data$redcap_metadata, .data$structure, .data$pks) tbl_x <- extract_tibble(suprtbl, x) tbl_x_type <- get_structure(suprtbl, x) @@ -34,8 +35,10 @@ join_data_tibbles <- function(suprtbl, } extract_keys <- function(suprtbl, record_id_field) { - redcap_keys <- c(record_id_field, "redcap_event", "redcap_form_instance", - "redcap_event_instance", "redcap_arm") + redcap_keys <- c( + record_id_field, "redcap_event", "redcap_form_instance", + "redcap_event_instance", "redcap_arm" + ) suprtbl |> colnames() |> @@ -44,12 +47,10 @@ extract_keys <- function(suprtbl, record_id_field) { } get_structure <- function(suprtbl, tbl_name) { - tbl <- extract_tibble(suprtbl, tbl = tbl_name) suprtbl$structure[suprtbl$redcap_form_name == tbl_name] } get_join_fn <- function(type) { - join_functions <- list( left = dplyr::left_join, right = dplyr::right_join, @@ -67,7 +68,6 @@ get_join_fn <- function(type) { } build_by <- function(suprtbl, x, y, is_mixed) { - x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE) y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% @@ -89,4 +89,3 @@ add_missing_columns <- function(tbl, columns) { tbl[missing_cols] <- NA return(tbl) } - From 374ad4852b84477897f5505896476646a4887f0b Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 20 Sep 2024 14:35:08 -0400 Subject: [PATCH 08/17] Update tests, no longer nonrepeats with single instance repeat TODO: Check if true, but beleive this is no longer needed now that we've redefined redcap_form_instance/redcap_event_instance --- tests/testthat/test-clean_redcap_long.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-clean_redcap_long.R b/tests/testthat/test-clean_redcap_long.R index 3a68e780..3cf78be7 100644 --- a/tests/testthat/test-clean_redcap_long.R +++ b/tests/testthat/test-clean_redcap_long.R @@ -112,7 +112,7 @@ test_that("clean_redcap_long with mixed structure works", { # Check redcap_data contents for mixed and nonrepeating structure expected_mixed_data <- tibble::tribble( ~record_id, ~redcap_event, ~redcap_form_instance, ~mixed_structure_1, ~form_status_complete, - 1, "event_1", 1, "Mixed Nonrepeat 1", 0, + 1, "event_1", NA, "Mixed Nonrepeat 1", 0, 1, "event_2", 1, "Mixed Repeat 1", 0, 1, "event_2", 2, "Mixed Repeat 2", 0 ) @@ -309,7 +309,7 @@ test_that("convert_mixed_instrument works", { expected_out <- tibble::tribble( ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable, ~mixed_repeat_var, - 1, "mixed_structure_form", 1, "A", NA, NA, + 1, "mixed_structure_form", NA, "A", NA, NA, 2, "mixed_structure_form", 1, "B", NA, NA, 3, "repeat_form", 1, NA, "C", NA, 4, "repeat_form", 2, NA, "D", NA, From 37b88a811c281d07fca873870ef853b9e9f8aceb Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Fri, 20 Sep 2024 15:40:01 -0400 Subject: [PATCH 09/17] Fix get_repeat_event_types edge case --- R/read_redcap.R | 5 +++-- tests/testthat/test-read_redcap.R | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/R/read_redcap.R b/R/read_redcap.R index f20ff34d..4cb6751f 100644 --- a/R/read_redcap.R +++ b/R/read_redcap.R @@ -544,10 +544,11 @@ get_repeat_event_types <- function(data) { # Check for instances where the same event is labelled as nonrepeating & repeating separate # If this is the case, it must be repeating separate (there is just data that qualifies as both) + out %>% mutate( - is_duplicated = duplicated(.data$repeat_type) | duplicated(.data$repeat_type, fromLast = TRUE) + is_duplicated = (duplicated(.data$redcap_event_name) | duplicated(.data$redcap_event_name, fromLast = TRUE)) ) %>% - filter(.data$is_duplicated == FALSE | (.data$is_duplicated == TRUE & .data$ repeat_type == "repeat_separate")) %>% + filter(!.data$is_duplicated | (.data$is_duplicated & .data$repeat_type == "repeat_separate")) %>% select(-.data$is_duplicated) } diff --git a/tests/testthat/test-read_redcap.R b/tests/testthat/test-read_redcap.R index 194e63f0..769b0d53 100644 --- a/tests/testthat/test-read_redcap.R +++ b/tests/testthat/test-read_redcap.R @@ -660,4 +660,25 @@ test_that("get_repeat_event_types() works", { out <- get_repeat_event_types(mixed_data_structure) expect_equal(out, expected_out) + + # Example with nonrepeating arm that contains repeating and non repeating forms + mixed_data_structure <- tibble::tribble( + ~"record_id", ~"redcap_event_name", ~"redcap_repeat_instrument", ~"redcap_repeat_instance", + 1, "nonrepeat", NA, NA, + 1, "nonrepeat", "repeat_form", 1, + 1, "repeat_together", NA, 1, + 1, "repeat_separate", "mixed_structure_form", 1 + ) + + out <- get_repeat_event_types(mixed_data_structure) + + expected_out <- tibble::tribble( + ~"redcap_event_name", ~"repeat_type", + "nonrepeat", "repeat_separate", + "repeat_together", "repeat_together", + "repeat_separate", "repeat_separate" + ) + + expect_equal(out, expected_out) + }) From 7f64232dc27ff7f0cfa816462784b7e5b256743e Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 23 Sep 2024 10:16:27 -0400 Subject: [PATCH 10/17] Reinstate single instance for nonrepeat --- R/clean_redcap_long.R | 3 +++ R/read_redcap.R | 2 +- man/add_event_mapping.Rd | 2 +- tests/testthat/test-clean_redcap_long.R | 4 ++-- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R index 750578cc..13171692 100644 --- a/R/clean_redcap_long.R +++ b/R/clean_redcap_long.R @@ -450,6 +450,9 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) { db_data_long <- db_data_long %>% mutate( redcap_repeat_instance = case_when( + # Add single instance repeat event instance vals when none exist + # This handles nonrepeating data in events set to repeat separately + update_mask & is.na(redcap_repeat_instance) ~ 1, # If repeat-together type, remove values from redcap_repeat_instance # (shifted and captured in redcap_event_instance) update_mask & is.na(redcap_repeat_instrument) ~ NA, diff --git a/R/read_redcap.R b/R/read_redcap.R index 4cb6751f..0e5e8e6f 100644 --- a/R/read_redcap.R +++ b/R/read_redcap.R @@ -443,7 +443,7 @@ add_metadata <- function(supertbl, db_metadata, redcap_uri, token, suppress_redc #' @param supertbl a supertibble object to supplement with metadata #' @param linked_arms the tibble with event mappings created by #' \code{link_arms()} -#' @param repeat_event_types a dataframe output from [repeat_event_types()] which +#' @param repeat_event_types a dataframe output from [get_repeat_event_types()] which #' specifies NR, RS, and RT types for events #' #' @return diff --git a/man/add_event_mapping.Rd b/man/add_event_mapping.Rd index 3fe9a8a3..5c6d68df 100644 --- a/man/add_event_mapping.Rd +++ b/man/add_event_mapping.Rd @@ -13,7 +13,7 @@ add_event_mapping(supertbl, linked_arms, repeat_event_types) \item{linked_arms}{the tibble with event mappings created by \code{link_arms()}} -\item{repeat_event_types}{a dataframe output from \code{\link[=repeat_event_types]{repeat_event_types()}} which +\item{repeat_event_types}{a dataframe output from \code{\link[=get_repeat_event_types]{get_repeat_event_types()}} which specifies NR, RS, and RT types for events} } \value{ diff --git a/tests/testthat/test-clean_redcap_long.R b/tests/testthat/test-clean_redcap_long.R index 3cf78be7..3a68e780 100644 --- a/tests/testthat/test-clean_redcap_long.R +++ b/tests/testthat/test-clean_redcap_long.R @@ -112,7 +112,7 @@ test_that("clean_redcap_long with mixed structure works", { # Check redcap_data contents for mixed and nonrepeating structure expected_mixed_data <- tibble::tribble( ~record_id, ~redcap_event, ~redcap_form_instance, ~mixed_structure_1, ~form_status_complete, - 1, "event_1", NA, "Mixed Nonrepeat 1", 0, + 1, "event_1", 1, "Mixed Nonrepeat 1", 0, 1, "event_2", 1, "Mixed Repeat 1", 0, 1, "event_2", 2, "Mixed Repeat 2", 0 ) @@ -309,7 +309,7 @@ test_that("convert_mixed_instrument works", { expected_out <- tibble::tribble( ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable, ~mixed_repeat_var, - 1, "mixed_structure_form", NA, "A", NA, NA, + 1, "mixed_structure_form", 1, "A", NA, NA, 2, "mixed_structure_form", 1, "B", NA, NA, 3, "repeat_form", 1, NA, "C", NA, 4, "repeat_form", 2, NA, "D", NA, From 593254d30c1d6997f4f709a2356b7a026e3ae672 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 23 Sep 2024 14:34:35 -0400 Subject: [PATCH 11/17] Add in support for mixed-mixed join, to verify --- R/join_data_tibbles.R | 83 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 9 deletions(-) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R index e09a4d41..591e4465 100644 --- a/R/join_data_tibbles.R +++ b/R/join_data_tibbles.R @@ -11,27 +11,35 @@ join_data_tibbles <- function(suprtbl, suprtbl <- suprtbl |> mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>% select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data, - .data$redcap_metadata, .data$structure, .data$pks) + .data$redcap_metadata, .data$structure, .data$pks, .data$redcap_events) tbl_x <- extract_tibble(suprtbl, x) - tbl_x_type <- get_structure(suprtbl, x) + tbl_x_structure <- get_structure(suprtbl, x) tbl_y <- extract_tibble(suprtbl, y) - tbl_y_type <- get_structure(suprtbl, y) + tbl_y_structure <- get_structure(suprtbl, y) # Mixed structure requires special handling - is_mixed <- any(c(tbl_x_type, tbl_y_type) == "mixed") + is_mixed <- any(c(tbl_x_structure, tbl_y_structure) == "mixed") if (is_mixed) { - required_columns <- c("redcap_event_instance", "redcap_form_instance") - tbl_x <- add_missing_columns(tbl_x, required_columns) - tbl_y <- add_missing_columns(tbl_y, required_columns) + # TODO: Determine if ok to remove + # required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter + # tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter + # tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter + + tbl_x_type <- get_type(suprtbl, x) + tbl_y_type <- get_type(suprtbl, y) + + tbl_x <- left_join(tbl_x, tbl_x_type, by = "redcap_event") + tbl_y <- left_join(tbl_y, tbl_y_type, by = "redcap_event") } join_fn <- get_join_fn(type) by <- build_by(suprtbl, x, y, is_mixed) - join_fn(tbl_x, tbl_y, by = by, suffix = suffix) %>% - relocate(starts_with("form_status_complete"), .after = everything()) + join_tbls(tbl_x, tbl_y, join_fn, by, suffix, is_mixed) %>% + relocate(starts_with("form_status_complete"), .after = everything()) %>% + select(-starts_with(".repeat_type")) } extract_keys <- function(suprtbl, record_id_field) { @@ -50,6 +58,16 @@ get_structure <- function(suprtbl, tbl_name) { suprtbl$structure[suprtbl$redcap_form_name == tbl_name] } +get_type <- function(suprtbl, tbl_name) { + suprtbl %>% + filter(.data$redcap_form_name == tbl_name) %>% + pull(.data$redcap_events) %>% + pluck(1) %>% + select(.data$redcap_event, + ".repeat_type" = .data$repeat_type) %>% + unique() +} + get_join_fn <- function(type) { join_functions <- list( left = dplyr::left_join, @@ -84,8 +102,55 @@ build_by <- function(suprtbl, x, y, is_mixed) { out } +# TODO: Determine if ok to remove add_missing_columns <- function(tbl, columns) { missing_cols <- setdiff(columns, names(tbl)) tbl[missing_cols] <- NA return(tbl) } + +join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) { + if (is_mixed) { + # Filter based on .repeat_type + x_together <- x %>% filter(.data$.repeat_type == "repeat_together") + y_together <- y %>% filter(.data$.repeat_type == "repeat_together") + + x_separate <- x %>% filter(.data$.repeat_type == "repeat_separate") + y_separate <- y %>% filter(.data$.repeat_type == "repeat_separate") + + # Join together sets + joined_together <- x_together %>% + join_fn(y_together, by = by[by != "redcap_form_instance"], suffix = suffix) + + # Join separate sets + joined_separate <- x_separate %>% + join_fn(y_separate, by = by[by != "redcap_form_instance"], suffix = suffix) + + # Bind rows together + result <- bind_rows(joined_together, joined_separate) %>% + drop_non_suffix_columns() + } else { + result <- join_fn(x, y, by = by, suffix = suffix) + } + + result +} + +drop_non_suffix_columns <- function(data) { + # Extract column names that contain a "." + # Note: We can look for periods because REDCap will not allow variables to made + # with them. Only user tampering with column names in the output would result in this. + dot_columns <- names(data)[grepl("\\.", names(data))] + + # Extract the base column names without the suffixes (everything before the ".") + base_columns <- unique(sub("\\..*", "", dot_columns)) + + # Filter out base columns that do not exist without a suffix + columns_to_drop <- base_columns[base_columns %in% names(data)] + + # Drop only those base columns that exist both with and without suffixes + data <- data %>% + select(-all_of(columns_to_drop)) + + return(data) +} From a343d6e6606c2220041620082c56a5bf33a3334c Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Tue, 24 Sep 2024 14:46:02 -0400 Subject: [PATCH 12/17] Update documentation, test methods for mixed joins --- NAMESPACE | 1 + R/join_data_tibbles.R | 148 +++++++++++++++++++++++++++++---------- man/build_by.Rd | 24 +++++++ man/extract_keys.Rd | 21 ++++++ man/get_join_fn.Rd | 19 +++++ man/get_structure.Rd | 17 +++++ man/get_type.Rd | 17 +++++ man/join_data_tibbles.Rd | 69 ++++++++++++++++++ 8 files changed, 279 insertions(+), 37 deletions(-) create mode 100644 man/build_by.Rd create mode 100644 man/extract_keys.Rd create mode 100644 man/get_join_fn.Rd create mode 100644 man/get_structure.Rd create mode 100644 man/get_type.Rd create mode 100644 man/join_data_tibbles.Rd diff --git a/NAMESPACE b/NAMESPACE index b74e60ed..e584bcaf 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(fmt_strip_html) export(fmt_strip_trailing_colon) export(fmt_strip_trailing_punct) export(fmt_strip_whitespace) +export(join_data_tibbles) export(make_labelled) export(read_redcap) export(write_redcap_xlsx) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R index 591e4465..57a9871d 100644 --- a/R/join_data_tibbles.R +++ b/R/join_data_tibbles.R @@ -1,65 +1,107 @@ -join_data_tibbles <- function(suprtbl, +#' @title Join Two Data Tibbles from a Supertibble +#' +#' @description +#' The [join_data_tibbles()] function provides a way to intelligently join two +#' data tibbles from a REDCaTidieR supertibble. A supertibble is an output of +#' [read_redcap()]. +#' +#' [join_data_tibbles()] attempts to correctly assign the `by` when left `NULL` (the default) +#' based on detecting the data tibble structure of `x` and `y`. +#' +#' @inheritParams extract_tibbles +#' @param type A character string for the type of join to be performed borrowing from +#' dplyr. One of "left", "right", "inner", or "full". Default "left". +#' @inheritParams dplyr::inner_join +#' +#' +#' @returns A `tibble`. +#' +#' @export + +join_data_tibbles <- function(supertbl, x, y, by = NULL, type = "left", suffix = c(".x", ".y")) { - record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) # nolint: object_usage_linter + record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) # nolint: object_usage_linter join_fn <- get_join_fn(type) # Append the supertibble with the primary keys column - suprtbl <- suprtbl |> + supertbl <- supertbl |> mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>% select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data, - .data$redcap_metadata, .data$structure, .data$pks, .data$redcap_events) + .data$redcap_metadata, .data$structure, .data$pks, matches("redcap_events")) - tbl_x <- extract_tibble(suprtbl, x) - tbl_x_structure <- get_structure(suprtbl, x) - tbl_y <- extract_tibble(suprtbl, y) - tbl_y_structure <- get_structure(suprtbl, y) + tbl_x <- extract_tibble(supertbl, x) + tbl_x_structure <- get_structure(supertbl, x) + tbl_y <- extract_tibble(supertbl, y) + tbl_y_structure <- get_structure(supertbl, y) # Mixed structure requires special handling is_mixed <- any(c(tbl_x_structure, tbl_y_structure) == "mixed") if (is_mixed) { # TODO: Determine if ok to remove - # required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter - # tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter - # tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter + required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter + tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter + tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter - tbl_x_type <- get_type(suprtbl, x) - tbl_y_type <- get_type(suprtbl, y) + tbl_x_type <- get_type(supertbl, x) + tbl_y_type <- get_type(supertbl, y) + # Add on .repeat_type specifier for the redcap_event column tbl_x <- left_join(tbl_x, tbl_x_type, by = "redcap_event") tbl_y <- left_join(tbl_y, tbl_y_type, by = "redcap_event") } join_fn <- get_join_fn(type) - by <- build_by(suprtbl, x, y, is_mixed) + by <- build_by(supertbl, x, y, is_mixed) - join_tbls(tbl_x, tbl_y, join_fn, by, suffix, is_mixed) %>% + join_fn(tbl_x, tbl_y, by, suffix) %>% relocate(starts_with("form_status_complete"), .after = everything()) %>% select(-starts_with(".repeat_type")) } -extract_keys <- function(suprtbl, record_id_field) { +#' @title Extract the primary keys associated with a data tibble +#' +#' @param data_tbl A data tibble from a supertibble +#' @param record_id_field The record ID field for the REDCap project, retrieved +#' as an ouput of [get_record_id_field()] +#' +#' @returns a character string +#' +#' @keywords internal +extract_keys <- function(data_tbl, record_id_field) { redcap_keys <- c( record_id_field, "redcap_event", "redcap_form_instance", "redcap_event_instance", "redcap_arm" ) - suprtbl |> + data_tbl |> colnames() |> intersect(redcap_keys) |> paste(collapse = ", ") } -get_structure <- function(suprtbl, tbl_name) { - suprtbl$structure[suprtbl$redcap_form_name == tbl_name] +#' @title Retrieve the structure data for a form from the supertibble +#' +#' @inheritParams join_data_tibbles +#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles` +#' +#' @keywords internal +get_structure <- function(supertbl, tbl_name) { + supertbl$structure[supertbl$redcap_form_name == tbl_name] } -get_type <- function(suprtbl, tbl_name) { - suprtbl %>% +#' @title Retrieve the repeat event type data for a form from the supertibble +#' +#' @inheritParams join_data_tibbles +#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles` +#' +#' @keywords internal +get_type <- function(supertbl, tbl_name) { + supertbl %>% filter(.data$redcap_form_name == tbl_name) %>% pull(.data$redcap_events) %>% pluck(1) %>% @@ -68,6 +110,13 @@ get_type <- function(suprtbl, tbl_name) { unique() } +#' @title Retrieve the appropriate user specified join function +#' +#' @inheritParams join_data_tibbles +#' +#' @returns a function +#' +#' @keywords internal get_join_fn <- function(type) { join_functions <- list( left = dplyr::left_join, @@ -76,24 +125,32 @@ get_join_fn <- function(type) { full = dplyr::full_join ) - # Check if the specified type is valid - # TODO: Make a standard check function with cli if (!type %in% names(join_functions)) { - stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") + cli::cli_abort("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") } join_functions[[type]] } -build_by <- function(suprtbl, x, y, is_mixed) { - x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% +#' @title Intelligently retrieve the join by cols +#' +#' @inheritParams join_data_tibbles +#' @param is_mixed TRUE/FALSE, whether or not the given tables contain a mixed structure +#' +#' @returns a character vector +#' +#' @keywords interal +build_by <- function(supertbl, x, y, is_mixed) { + x_pks <- supertbl$pks[supertbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE) - y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% + y_pks <- supertbl$pks[supertbl$redcap_form_name == y] %>% stringr::str_split(", ", simplify = TRUE) out <- intersect(x_pks, y_pks) if (is_mixed) { + # For mixed tables, depending on the .repeat_types present tables may not + # have event and form instance columns and must be added out <- c(out, "redcap_event_instance", "redcap_form_instance") %>% # TODO: Make standard, currently needed for repeat/mixed joins unique() @@ -102,37 +159,54 @@ build_by <- function(suprtbl, x, y, is_mixed) { out } -# TODO: Determine if ok to remove +#' @keywords intenral +#' @noRd add_missing_columns <- function(tbl, columns) { missing_cols <- setdiff(columns, names(tbl)) tbl[missing_cols] <- NA return(tbl) } +#' @title Join data tbls of various structures and types +#' +#' @description +#' [join_tbls()] either performs the `join_fun()` specified by the `type` or, in +#' the event of mixed structure data tibble joins, will seek to split data into +#' three categories before performing the joins. The key identifiers here are +#' `redcap_form_instance` and the added `.repeat_type` columns. + join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) { if (is_mixed) { # Filter based on .repeat_type + # If repeating together events, can use redcap_form_instance (NA) and redcap_event_instance x_together <- x %>% filter(.data$.repeat_type == "repeat_together") y_together <- y %>% filter(.data$.repeat_type == "repeat_together") - x_separate <- x %>% filter(.data$.repeat_type == "repeat_separate") - y_separate <- y %>% filter(.data$.repeat_type == "repeat_separate") + # repeating instruments for separately repeating events shouldn't be joined by redcap_form_instance + x_separate_repeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & !is.na(.data$redcap_form_instance)) + y_separate_repeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & !is.na(.data$redcap_form_instance)) + + # nonrepeating instruments for separately repeating events should be joined by redcap_form_instance + x_separate_nonrepeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance)) + y_separate_nonrepeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance)) # Join together sets joined_together <- x_together %>% - join_fn(y_together, by = by[by != "redcap_form_instance"], suffix = suffix) + join_fn(y_together, by = by, suffix = suffix) - # Join separate sets - joined_separate <- x_separate %>% - join_fn(y_separate, by = by[by != "redcap_form_instance"], suffix = suffix) + joined_separate_repeating <- x_separate_repeating %>% + join_fn(y_separate_repeating, by = by[by != "redcap_form_instance"], suffix = suffix) - # Bind rows together - result <- bind_rows(joined_together, joined_separate) %>% + joined_separate_nonrepeating <- x_separate_nonrepeating %>% + join_fn(y_separate_nonrepeating, by = by, suffix = suffix) + + # Bind rows together, issue in arrangmenet of output + result <- bind_rows(joined_together, joined_separate_repeating) %>% + bind_rows(joined_separate_nonrepeating) %>% drop_non_suffix_columns() } else { result <- join_fn(x, y, by = by, suffix = suffix) } - result } diff --git a/man/build_by.Rd b/man/build_by.Rd new file mode 100644 index 00000000..e491c439 --- /dev/null +++ b/man/build_by.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_data_tibbles.R +\name{build_by} +\alias{build_by} +\title{Intelligently retrieve the join by cols} +\usage{ +build_by(supertbl, x, y, is_mixed) +} +\arguments{ +\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} + +\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{is_mixed}{TRUE/FALSE, whether or not the given tables contain a mixed structure} +} +\value{ +a character vector +} +\description{ +Intelligently retrieve the join by cols +} +\keyword{interal} diff --git a/man/extract_keys.Rd b/man/extract_keys.Rd new file mode 100644 index 00000000..b497120b --- /dev/null +++ b/man/extract_keys.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_data_tibbles.R +\name{extract_keys} +\alias{extract_keys} +\title{Extract the primary keys associated with a data tibble} +\usage{ +extract_keys(data_tbl, record_id_field) +} +\arguments{ +\item{data_tbl}{A data tibble from a supertibble} + +\item{record_id_field}{The record ID field for the REDCap project, retrieved +as an ouput of \code{\link[=get_record_id_field]{get_record_id_field()}}} +} +\value{ +a character string +} +\description{ +Extract the primary keys associated with a data tibble +} +\keyword{internal} diff --git a/man/get_join_fn.Rd b/man/get_join_fn.Rd new file mode 100644 index 00000000..f99d7b01 --- /dev/null +++ b/man/get_join_fn.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_data_tibbles.R +\name{get_join_fn} +\alias{get_join_fn} +\title{Retrieve the appropriate user specified join function} +\usage{ +get_join_fn(type) +} +\arguments{ +\item{type}{A character string for the type of join to be performed borrowing from +dplyr. One of "left", "right", "inner", or "full". Default "left".} +} +\value{ +a function +} +\description{ +Retrieve the appropriate user specified join function +} +\keyword{internal} diff --git a/man/get_structure.Rd b/man/get_structure.Rd new file mode 100644 index 00000000..b8a901eb --- /dev/null +++ b/man/get_structure.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_data_tibbles.R +\name{get_structure} +\alias{get_structure} +\title{Retrieve the structure data for a form from the supertibble} +\usage{ +get_structure(supertbl, tbl_name) +} +\arguments{ +\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} + +\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}} +} +\description{ +Retrieve the structure data for a form from the supertibble +} +\keyword{internal} diff --git a/man/get_type.Rd b/man/get_type.Rd new file mode 100644 index 00000000..f3088095 --- /dev/null +++ b/man/get_type.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_data_tibbles.R +\name{get_type} +\alias{get_type} +\title{Retrieve the repeat event type data for a form from the supertibble} +\usage{ +get_type(supertbl, tbl_name) +} +\arguments{ +\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} + +\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}} +} +\description{ +Retrieve the repeat event type data for a form from the supertibble +} +\keyword{internal} diff --git a/man/join_data_tibbles.Rd b/man/join_data_tibbles.Rd new file mode 100644 index 00000000..75dc94b1 --- /dev/null +++ b/man/join_data_tibbles.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_data_tibbles.R +\name{join_data_tibbles} +\alias{join_data_tibbles} +\title{Join Two Data Tibbles from a Supertibble} +\usage{ +join_data_tibbles( + supertbl, + x, + y, + by = NULL, + type = "left", + suffix = c(".x", ".y") +) +} +\arguments{ +\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} + +\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character +vector of variables to join by. + +If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all +variables in common across \code{x} and \code{y}. A message lists the variables so +that you can check they're correct; suppress the message by supplying \code{by} +explicitly. + +To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} +specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. + +To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with +multiple expressions. For example, \code{join_by(a == b, c == d)} will match +\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between +\code{x} and \code{y}, you can shorten this by listing only the variable names, like +\code{join_by(a, c)}. + +\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap +joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on +these types of joins. + +For simple equality joins, you can alternatively specify a character vector +of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} +to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, +use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. + +To perform a cross-join, generating all combinations of \code{x} and \code{y}, see +\code{\link[dplyr:cross_join]{cross_join()}}.} + +\item{type}{A character string for the type of join to be performed borrowing from +dplyr. One of "left", "right", "inner", or "full". Default "left".} + +\item{suffix}{If there are non-joined duplicate variables in \code{x} and +\code{y}, these suffixes will be added to the output to disambiguate them. +Should be a character vector of length 2.} +} +\value{ +A \code{tibble}. +} +\description{ +The \code{\link[=join_data_tibbles]{join_data_tibbles()}} function provides a way to intelligently join two +data tibbles from a REDCaTidieR supertibble. A supertibble is an output of +\code{\link[=read_redcap]{read_redcap()}}. + +\code{\link[=join_data_tibbles]{join_data_tibbles()}} attempts to correctly assign the \code{by} when left \code{NULL} (the default) +based on detecting the data tibble structure of \code{x} and \code{y}. +} From 851450fad8dadd080b1bd0d57d9089be924e984d Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Thu, 26 Sep 2024 13:25:15 -0400 Subject: [PATCH 13/17] Documentation fixes --- NAMESPACE | 2 ++ R/REDCapTidieR-package.R | 4 +-- R/clean_redcap_long.R | 2 +- R/join_data_tibbles.R | 10 ++++++- R/read_redcap.R | 4 +-- man/build_by.Rd | 2 +- man/join_tbls.Rd | 60 ++++++++++++++++++++++++++++++++++++++++ pkgdown/_pkgdown.yml | 1 + 8 files changed, 78 insertions(+), 7 deletions(-) create mode 100644 man/join_tbls.Rd diff --git a/NAMESPACE b/NAMESPACE index e584bcaf..056ce167 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -46,6 +46,7 @@ importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) importFrom(dplyr,coalesce) importFrom(dplyr,cur_column) +importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,first) importFrom(dplyr,group_by) @@ -148,6 +149,7 @@ importFrom(tidyselect,any_of) importFrom(tidyselect,ends_with) importFrom(tidyselect,eval_select) importFrom(tidyselect,everything) +importFrom(tidyselect,matches) importFrom(tidyselect,starts_with) importFrom(tidyselect,where) importFrom(vctrs,vec_ptype) diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R index 5771fc22..4c8d6142 100644 --- a/R/REDCapTidieR-package.R +++ b/R/REDCapTidieR-package.R @@ -7,7 +7,7 @@ #' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else #' left_join mutate pull recode relocate rename right_join row_number rowwise #' select slice summarise ungroup coalesce cur_column bind_cols first nth n_distinct -#' first +#' first distinct #' @importFrom formattable percent #' @importFrom lobstr obj_size #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date @@ -27,7 +27,7 @@ #' @importFrom tidyr complete fill pivot_wider nest separate_wider_delim unnest #' unnest_wider #' @importFrom tidyselect all_of any_of ends_with eval_select everything -#' starts_with where +#' starts_with where matches #' @importFrom vctrs vec_ptype_abbr vec_ptype #' @importFrom pillar tbl_sum #' @importFrom readr parse_logical parse_integer parse_double parse_date parse_time diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R index 13171692..1dc7b549 100644 --- a/R/clean_redcap_long.R +++ b/R/clean_redcap_long.R @@ -322,7 +322,7 @@ distill_repeat_table_long <- function(form_name, filter( ( !is.na(.data$redcap_form_instance) | - if_any(starts_with("redcap_event_instance"), ~ !is.na(.)) + if_any(matches("redcap_event_instance"), ~ !is.na(.)) ) & .data$redcap_repeat_instrument == my_form ) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R index 57a9871d..3db4912b 100644 --- a/R/join_data_tibbles.R +++ b/R/join_data_tibbles.R @@ -139,7 +139,7 @@ get_join_fn <- function(type) { #' #' @returns a character vector #' -#' @keywords interal +#' @keywords internal build_by <- function(supertbl, x, y, is_mixed) { x_pks <- supertbl$pks[supertbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE) @@ -174,6 +174,14 @@ add_missing_columns <- function(tbl, columns) { #' the event of mixed structure data tibble joins, will seek to split data into #' three categories before performing the joins. The key identifiers here are #' `redcap_form_instance` and the added `.repeat_type` columns. +#' +#' @inheritParams join_data_tibbles +#' @param join_fn the user specified join function type output by [get_join_fn()] +#' @param is_mixed TRUE/FALSE mixed data structure +#' +#' @returns a dataframe +#' +#' @keywords internal join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) { if (is_mixed) { diff --git a/R/read_redcap.R b/R/read_redcap.R index 0e5e8e6f..af098fc7 100644 --- a/R/read_redcap.R +++ b/R/read_redcap.R @@ -530,7 +530,7 @@ calc_metadata_stats <- function(data) { get_repeat_event_types <- function(data) { out <- data %>% - dplyr::distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>% + distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>% mutate( repeat_type = case_when( !is.na(redcap_event_name) & !is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~ @@ -540,7 +540,7 @@ get_repeat_event_types <- function(data) { TRUE ~ "nonrepeating" ) ) %>% - dplyr::distinct(.data$redcap_event_name, .data$repeat_type) + distinct(.data$redcap_event_name, .data$repeat_type) # Check for instances where the same event is labelled as nonrepeating & repeating separate # If this is the case, it must be repeating separate (there is just data that qualifies as both) diff --git a/man/build_by.Rd b/man/build_by.Rd index e491c439..81696fb2 100644 --- a/man/build_by.Rd +++ b/man/build_by.Rd @@ -21,4 +21,4 @@ a character vector \description{ Intelligently retrieve the join by cols } -\keyword{interal} +\keyword{internal} diff --git a/man/join_tbls.Rd b/man/join_tbls.Rd new file mode 100644 index 00000000..95fd52b5 --- /dev/null +++ b/man/join_tbls.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/join_data_tibbles.R +\name{join_tbls} +\alias{join_tbls} +\title{Join data tbls of various structures and types} +\usage{ +join_tbls(x, y, join_fn, by, suffix, is_mixed) +} +\arguments{ +\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or +lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for +more details.} + +\item{join_fn}{the user specified join function type output by \code{\link[=get_join_fn]{get_join_fn()}}} + +\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character +vector of variables to join by. + +If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all +variables in common across \code{x} and \code{y}. A message lists the variables so +that you can check they're correct; suppress the message by supplying \code{by} +explicitly. + +To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} +specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. + +To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with +multiple expressions. For example, \code{join_by(a == b, c == d)} will match +\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between +\code{x} and \code{y}, you can shorten this by listing only the variable names, like +\code{join_by(a, c)}. + +\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap +joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on +these types of joins. + +For simple equality joins, you can alternatively specify a character vector +of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} +to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, +use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. + +To perform a cross-join, generating all combinations of \code{x} and \code{y}, see +\code{\link[dplyr:cross_join]{cross_join()}}.} + +\item{suffix}{If there are non-joined duplicate variables in \code{x} and +\code{y}, these suffixes will be added to the output to disambiguate them. +Should be a character vector of length 2.} + +\item{is_mixed}{TRUE/FALSE mixed data structure} +} +\value{ +a dataframe +} +\description{ +\code{\link[=join_tbls]{join_tbls()}} either performs the \code{join_fun()} specified by the \code{type} or, in +the event of mixed structure data tibble joins, will seek to split data into +three categories before performing the joins. The key identifiers here are +\code{redcap_form_instance} and the added \code{.repeat_type} columns. +} +\keyword{internal} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index ede73fed..9829a0fc 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -69,6 +69,7 @@ reference: Helpful functions for supertibble data analytics. contents: - combine_checkboxes + - join_data_tibbles - title: "Data" contents: - superheroes_supertbl From 0fdb9bfb5eb7d43e0a9da367fa61e0322d87f491 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 14 Oct 2024 15:24:28 -0400 Subject: [PATCH 14/17] Move join_data_tibbles out to new branch --- NAMESPACE | 1 - R/join_data_tibbles.R | 238 --------------------------------------- man/build_by.Rd | 24 ---- man/extract_keys.Rd | 21 ---- man/get_join_fn.Rd | 19 ---- man/get_structure.Rd | 17 --- man/get_type.Rd | 17 --- man/join_data_tibbles.Rd | 69 ------------ man/join_tbls.Rd | 60 ---------- pkgdown/_pkgdown.yml | 1 - 10 files changed, 467 deletions(-) delete mode 100644 R/join_data_tibbles.R delete mode 100644 man/build_by.Rd delete mode 100644 man/extract_keys.Rd delete mode 100644 man/get_join_fn.Rd delete mode 100644 man/get_structure.Rd delete mode 100644 man/get_type.Rd delete mode 100644 man/join_data_tibbles.Rd delete mode 100644 man/join_tbls.Rd diff --git a/NAMESPACE b/NAMESPACE index 056ce167..5c16f592 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,7 +12,6 @@ export(fmt_strip_html) export(fmt_strip_trailing_colon) export(fmt_strip_trailing_punct) export(fmt_strip_whitespace) -export(join_data_tibbles) export(make_labelled) export(read_redcap) export(write_redcap_xlsx) diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R deleted file mode 100644 index 3db4912b..00000000 --- a/R/join_data_tibbles.R +++ /dev/null @@ -1,238 +0,0 @@ -#' @title Join Two Data Tibbles from a Supertibble -#' -#' @description -#' The [join_data_tibbles()] function provides a way to intelligently join two -#' data tibbles from a REDCaTidieR supertibble. A supertibble is an output of -#' [read_redcap()]. -#' -#' [join_data_tibbles()] attempts to correctly assign the `by` when left `NULL` (the default) -#' based on detecting the data tibble structure of `x` and `y`. -#' -#' @inheritParams extract_tibbles -#' @param type A character string for the type of join to be performed borrowing from -#' dplyr. One of "left", "right", "inner", or "full". Default "left". -#' @inheritParams dplyr::inner_join -#' -#' -#' @returns A `tibble`. -#' -#' @export - -join_data_tibbles <- function(supertbl, - x, - y, - by = NULL, - type = "left", - suffix = c(".x", ".y")) { - record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) # nolint: object_usage_linter - join_fn <- get_join_fn(type) - - # Append the supertibble with the primary keys column - supertbl <- supertbl |> - mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>% - select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data, - .data$redcap_metadata, .data$structure, .data$pks, matches("redcap_events")) - - tbl_x <- extract_tibble(supertbl, x) - tbl_x_structure <- get_structure(supertbl, x) - tbl_y <- extract_tibble(supertbl, y) - tbl_y_structure <- get_structure(supertbl, y) - - # Mixed structure requires special handling - is_mixed <- any(c(tbl_x_structure, tbl_y_structure) == "mixed") - - if (is_mixed) { - # TODO: Determine if ok to remove - required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter - tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter - tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter - - tbl_x_type <- get_type(supertbl, x) - tbl_y_type <- get_type(supertbl, y) - - # Add on .repeat_type specifier for the redcap_event column - tbl_x <- left_join(tbl_x, tbl_x_type, by = "redcap_event") - tbl_y <- left_join(tbl_y, tbl_y_type, by = "redcap_event") - } - - join_fn <- get_join_fn(type) - by <- build_by(supertbl, x, y, is_mixed) - - join_fn(tbl_x, tbl_y, by, suffix) %>% - relocate(starts_with("form_status_complete"), .after = everything()) %>% - select(-starts_with(".repeat_type")) -} - -#' @title Extract the primary keys associated with a data tibble -#' -#' @param data_tbl A data tibble from a supertibble -#' @param record_id_field The record ID field for the REDCap project, retrieved -#' as an ouput of [get_record_id_field()] -#' -#' @returns a character string -#' -#' @keywords internal -extract_keys <- function(data_tbl, record_id_field) { - redcap_keys <- c( - record_id_field, "redcap_event", "redcap_form_instance", - "redcap_event_instance", "redcap_arm" - ) - - data_tbl |> - colnames() |> - intersect(redcap_keys) |> - paste(collapse = ", ") -} - -#' @title Retrieve the structure data for a form from the supertibble -#' -#' @inheritParams join_data_tibbles -#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles` -#' -#' @keywords internal -get_structure <- function(supertbl, tbl_name) { - supertbl$structure[supertbl$redcap_form_name == tbl_name] -} - -#' @title Retrieve the repeat event type data for a form from the supertibble -#' -#' @inheritParams join_data_tibbles -#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles` -#' -#' @keywords internal -get_type <- function(supertbl, tbl_name) { - supertbl %>% - filter(.data$redcap_form_name == tbl_name) %>% - pull(.data$redcap_events) %>% - pluck(1) %>% - select(.data$redcap_event, - ".repeat_type" = .data$repeat_type) %>% - unique() -} - -#' @title Retrieve the appropriate user specified join function -#' -#' @inheritParams join_data_tibbles -#' -#' @returns a function -#' -#' @keywords internal -get_join_fn <- function(type) { - join_functions <- list( - left = dplyr::left_join, - right = dplyr::right_join, - inner = dplyr::inner_join, - full = dplyr::full_join - ) - - if (!type %in% names(join_functions)) { - cli::cli_abort("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") - } - - join_functions[[type]] -} - -#' @title Intelligently retrieve the join by cols -#' -#' @inheritParams join_data_tibbles -#' @param is_mixed TRUE/FALSE, whether or not the given tables contain a mixed structure -#' -#' @returns a character vector -#' -#' @keywords internal -build_by <- function(supertbl, x, y, is_mixed) { - x_pks <- supertbl$pks[supertbl$redcap_form_name == x] %>% - stringr::str_split(", ", simplify = TRUE) - y_pks <- supertbl$pks[supertbl$redcap_form_name == y] %>% - stringr::str_split(", ", simplify = TRUE) - - out <- intersect(x_pks, y_pks) - - if (is_mixed) { - # For mixed tables, depending on the .repeat_types present tables may not - # have event and form instance columns and must be added - out <- c(out, "redcap_event_instance", "redcap_form_instance") %>% - # TODO: Make standard, currently needed for repeat/mixed joins - unique() - } - - out -} - -#' @keywords intenral -#' @noRd -add_missing_columns <- function(tbl, columns) { - missing_cols <- setdiff(columns, names(tbl)) - tbl[missing_cols] <- NA - return(tbl) -} - -#' @title Join data tbls of various structures and types -#' -#' @description -#' [join_tbls()] either performs the `join_fun()` specified by the `type` or, in -#' the event of mixed structure data tibble joins, will seek to split data into -#' three categories before performing the joins. The key identifiers here are -#' `redcap_form_instance` and the added `.repeat_type` columns. -#' -#' @inheritParams join_data_tibbles -#' @param join_fn the user specified join function type output by [get_join_fn()] -#' @param is_mixed TRUE/FALSE mixed data structure -#' -#' @returns a dataframe -#' -#' @keywords internal - -join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) { - if (is_mixed) { - # Filter based on .repeat_type - # If repeating together events, can use redcap_form_instance (NA) and redcap_event_instance - x_together <- x %>% filter(.data$.repeat_type == "repeat_together") - y_together <- y %>% filter(.data$.repeat_type == "repeat_together") - - # repeating instruments for separately repeating events shouldn't be joined by redcap_form_instance - x_separate_repeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & !is.na(.data$redcap_form_instance)) - y_separate_repeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & !is.na(.data$redcap_form_instance)) - - # nonrepeating instruments for separately repeating events should be joined by redcap_form_instance - x_separate_nonrepeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance)) - y_separate_nonrepeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance)) - - # Join together sets - joined_together <- x_together %>% - join_fn(y_together, by = by, suffix = suffix) - - joined_separate_repeating <- x_separate_repeating %>% - join_fn(y_separate_repeating, by = by[by != "redcap_form_instance"], suffix = suffix) - - joined_separate_nonrepeating <- x_separate_nonrepeating %>% - join_fn(y_separate_nonrepeating, by = by, suffix = suffix) - - # Bind rows together, issue in arrangmenet of output - result <- bind_rows(joined_together, joined_separate_repeating) %>% - bind_rows(joined_separate_nonrepeating) %>% - drop_non_suffix_columns() - } else { - result <- join_fn(x, y, by = by, suffix = suffix) - } - result -} - -drop_non_suffix_columns <- function(data) { - # Extract column names that contain a "." - # Note: We can look for periods because REDCap will not allow variables to made - # with them. Only user tampering with column names in the output would result in this. - dot_columns <- names(data)[grepl("\\.", names(data))] - - # Extract the base column names without the suffixes (everything before the ".") - base_columns <- unique(sub("\\..*", "", dot_columns)) - - # Filter out base columns that do not exist without a suffix - columns_to_drop <- base_columns[base_columns %in% names(data)] - - # Drop only those base columns that exist both with and without suffixes - data <- data %>% - select(-all_of(columns_to_drop)) - - return(data) -} diff --git a/man/build_by.Rd b/man/build_by.Rd deleted file mode 100644 index 81696fb2..00000000 --- a/man/build_by.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{build_by} -\alias{build_by} -\title{Intelligently retrieve the join by cols} -\usage{ -build_by(supertbl, x, y, is_mixed) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or -lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for -more details.} - -\item{is_mixed}{TRUE/FALSE, whether or not the given tables contain a mixed structure} -} -\value{ -a character vector -} -\description{ -Intelligently retrieve the join by cols -} -\keyword{internal} diff --git a/man/extract_keys.Rd b/man/extract_keys.Rd deleted file mode 100644 index b497120b..00000000 --- a/man/extract_keys.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{extract_keys} -\alias{extract_keys} -\title{Extract the primary keys associated with a data tibble} -\usage{ -extract_keys(data_tbl, record_id_field) -} -\arguments{ -\item{data_tbl}{A data tibble from a supertibble} - -\item{record_id_field}{The record ID field for the REDCap project, retrieved -as an ouput of \code{\link[=get_record_id_field]{get_record_id_field()}}} -} -\value{ -a character string -} -\description{ -Extract the primary keys associated with a data tibble -} -\keyword{internal} diff --git a/man/get_join_fn.Rd b/man/get_join_fn.Rd deleted file mode 100644 index f99d7b01..00000000 --- a/man/get_join_fn.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{get_join_fn} -\alias{get_join_fn} -\title{Retrieve the appropriate user specified join function} -\usage{ -get_join_fn(type) -} -\arguments{ -\item{type}{A character string for the type of join to be performed borrowing from -dplyr. One of "left", "right", "inner", or "full". Default "left".} -} -\value{ -a function -} -\description{ -Retrieve the appropriate user specified join function -} -\keyword{internal} diff --git a/man/get_structure.Rd b/man/get_structure.Rd deleted file mode 100644 index b8a901eb..00000000 --- a/man/get_structure.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{get_structure} -\alias{get_structure} -\title{Retrieve the structure data for a form from the supertibble} -\usage{ -get_structure(supertbl, tbl_name) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}} -} -\description{ -Retrieve the structure data for a form from the supertibble -} -\keyword{internal} diff --git a/man/get_type.Rd b/man/get_type.Rd deleted file mode 100644 index f3088095..00000000 --- a/man/get_type.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{get_type} -\alias{get_type} -\title{Retrieve the repeat event type data for a form from the supertibble} -\usage{ -get_type(supertbl, tbl_name) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}} -} -\description{ -Retrieve the repeat event type data for a form from the supertibble -} -\keyword{internal} diff --git a/man/join_data_tibbles.Rd b/man/join_data_tibbles.Rd deleted file mode 100644 index 75dc94b1..00000000 --- a/man/join_data_tibbles.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{join_data_tibbles} -\alias{join_data_tibbles} -\title{Join Two Data Tibbles from a Supertibble} -\usage{ -join_data_tibbles( - supertbl, - x, - y, - by = NULL, - type = "left", - suffix = c(".x", ".y") -) -} -\arguments{ -\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.} - -\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or -lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for -more details.} - -\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character -vector of variables to join by. - -If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all -variables in common across \code{x} and \code{y}. A message lists the variables so -that you can check they're correct; suppress the message by supplying \code{by} -explicitly. - -To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} -specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. - -To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with -multiple expressions. For example, \code{join_by(a == b, c == d)} will match -\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between -\code{x} and \code{y}, you can shorten this by listing only the variable names, like -\code{join_by(a, c)}. - -\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap -joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on -these types of joins. - -For simple equality joins, you can alternatively specify a character vector -of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} -to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, -use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. - -To perform a cross-join, generating all combinations of \code{x} and \code{y}, see -\code{\link[dplyr:cross_join]{cross_join()}}.} - -\item{type}{A character string for the type of join to be performed borrowing from -dplyr. One of "left", "right", "inner", or "full". Default "left".} - -\item{suffix}{If there are non-joined duplicate variables in \code{x} and -\code{y}, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.} -} -\value{ -A \code{tibble}. -} -\description{ -The \code{\link[=join_data_tibbles]{join_data_tibbles()}} function provides a way to intelligently join two -data tibbles from a REDCaTidieR supertibble. A supertibble is an output of -\code{\link[=read_redcap]{read_redcap()}}. - -\code{\link[=join_data_tibbles]{join_data_tibbles()}} attempts to correctly assign the \code{by} when left \code{NULL} (the default) -based on detecting the data tibble structure of \code{x} and \code{y}. -} diff --git a/man/join_tbls.Rd b/man/join_tbls.Rd deleted file mode 100644 index 95fd52b5..00000000 --- a/man/join_tbls.Rd +++ /dev/null @@ -1,60 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/join_data_tibbles.R -\name{join_tbls} -\alias{join_tbls} -\title{Join data tbls of various structures and types} -\usage{ -join_tbls(x, y, join_fn, by, suffix, is_mixed) -} -\arguments{ -\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or -lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for -more details.} - -\item{join_fn}{the user specified join function type output by \code{\link[=get_join_fn]{get_join_fn()}}} - -\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character -vector of variables to join by. - -If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all -variables in common across \code{x} and \code{y}. A message lists the variables so -that you can check they're correct; suppress the message by supplying \code{by} -explicitly. - -To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} -specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. - -To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with -multiple expressions. For example, \code{join_by(a == b, c == d)} will match -\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between -\code{x} and \code{y}, you can shorten this by listing only the variable names, like -\code{join_by(a, c)}. - -\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap -joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on -these types of joins. - -For simple equality joins, you can alternatively specify a character vector -of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} -to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, -use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. - -To perform a cross-join, generating all combinations of \code{x} and \code{y}, see -\code{\link[dplyr:cross_join]{cross_join()}}.} - -\item{suffix}{If there are non-joined duplicate variables in \code{x} and -\code{y}, these suffixes will be added to the output to disambiguate them. -Should be a character vector of length 2.} - -\item{is_mixed}{TRUE/FALSE mixed data structure} -} -\value{ -a dataframe -} -\description{ -\code{\link[=join_tbls]{join_tbls()}} either performs the \code{join_fun()} specified by the \code{type} or, in -the event of mixed structure data tibble joins, will seek to split data into -three categories before performing the joins. The key identifiers here are -\code{redcap_form_instance} and the added \code{.repeat_type} columns. -} -\keyword{internal} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 9829a0fc..ede73fed 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -69,7 +69,6 @@ reference: Helpful functions for supertibble data analytics. contents: - combine_checkboxes - - join_data_tibbles - title: "Data" contents: - superheroes_supertbl From 2d0e61fd80c54640aa23d5d27f6d09580cefc78b Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Mon, 14 Oct 2024 15:24:51 -0400 Subject: [PATCH 15/17] Update snapshot per laballed vignette update --- tests/testthat/_snaps/write.md | 154 +++++++++++++++++++++++++++++---- 1 file changed, 138 insertions(+), 16 deletions(-) diff --git a/tests/testthat/_snaps/write.md b/tests/testthat/_snaps/write.md index d77388fc..c6943f93 100644 --- a/tests/testthat/_snaps/write.md +++ b/tests/testthat/_snaps/write.md @@ -16,7 +16,8 @@ 9 api_no_access_2 API No Access 2 10 survey Survey 11 repeat_survey Repeat Survey - 12 REDCap Metadata + 12 labelled_vignette Labelled Vignette + 13 REDCap Metadata Repeating or Nonrepeating? # of Rows in Data # of Columns in Data 2 structure data_rows data_cols 3 nonrepeating 4 4 @@ -28,19 +29,21 @@ 9 nonrepeating 4 5 10 nonrepeating 4 9 11 repeating 3 10 - 12 + 12 nonrepeating 4 7 + 13 Data size in Memory % of Data Missing NA Sheet # 2 data_size data_na_pct form_complete_pct Sheet # 3 2.28 kB 0.25 0 1 4 1.94 kB 0.5 0 2 5 2.58 kB 0 0 3 - 6 7.71 kB 0.293103448275862 0 4 + 6 7.71 kB 0.28448275862069 0 4 7 7.40 kB 0.75 0 5 8 1.78 kB 1 0 6 9 2.06 kB 1 0 7 10 3.73 kB 0.392857142857143 0 8 11 3.94 kB 0.142857142857143 0 9 - 12 10 + 12 3.04 kB 0 0 10 + 13 11 [[1]][[2]] Record ID Text Box Input Text Box Input REDCap Instrument Completed? @@ -77,7 +80,7 @@ 2 record_id text note calculated dropdown_single radio_single 3 1 text notes 2 one B 4 2 2 three C - 5 3 + 5 3 2 6 4 2 NA NA NA 2 radio_duplicate_label checkbox_multiple___1 checkbox_multiple___2 @@ -225,6 +228,20 @@ 5 2022-11-09 12:21:04 Complete [[1]][[11]] + Record ID Text Box Radio Buttons Checkbox: A Checkbox: B Checkbox: C + 2 record_id text_box_1 radio_buttons_1 checkbox___1 checkbox___2 checkbox___3 + 3 1 Record 1 A Checked Unchecked Unchecked + 4 2 Record 2 B Checked Checked Unchecked + 5 3 Record 3 C Unchecked Checked Checked + 6 4 Record 4 A Unchecked Unchecked Unchecked + REDCap Instrument Completed? + 2 form_status_complete + 3 Complete + 4 Complete + 5 Complete + 6 Complete + + [[1]][[12]] REDCap Instrument Name REDCap Instrument Description 2 redcap_form_name redcap_form_label 3 @@ -293,6 +310,11 @@ 66 repeat_survey Repeat Survey 67 repeat_survey Repeat Survey 68 repeat_survey Repeat Survey + 69 labelled_vignette Labelled Vignette + 70 labelled_vignette Labelled Vignette + 71 labelled_vignette Labelled Vignette + 72 labelled_vignette Labelled Vignette + 73 labelled_vignette Labelled Vignette Variable / Field Name 2 field_name 3 record_id @@ -361,6 +383,11 @@ 66 repeatsurvey_checkbox_v2___one 67 repeatsurvey_checkbox_v2___two 68 repeatsurvey_checkbox_v2___three + 69 text_box_1 + 70 radio_buttons_1 + 71 checkbox___1 + 72 checkbox___2 + 73 checkbox___3 Field Label Field Type 2 field_label field_type 3 Record ID text @@ -429,6 +456,11 @@ 66 Checkbox Field: Choice 1 checkbox 67 Checkbox Field: Choice 2 checkbox 68 Checkbox Field: Choice 3 checkbox + 69 Text Box text + 70 Radio Buttons radio + 71 Checkbox: A checkbox + 72 Checkbox: B checkbox + 73 Checkbox: C checkbox Section Header Prior to this Field 2 section_header 3 @@ -497,6 +529,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Choices, Calculations, or Slider Labels 2 select_choices_or_calculations 3 @@ -565,6 +602,11 @@ 66 one, Choice 1 | two, Choice 2 | three, Choice 3 67 one, Choice 1 | two, Choice 2 | three, Choice 3 68 one, Choice 1 | two, Choice 2 | three, Choice 3 + 69 + 70 1, A | 2, B | 3, C + 71 1, A | 2, B | 3, C + 72 1, A | 2, B | 3, C + 73 1, A | 2, B | 3, C Field Note Text Validation Type OR Show Slider Number 2 field_note text_validation_type_or_show_slider_number 3 @@ -633,6 +675,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Minimum Accepted Value for Text Validation 2 text_validation_min 3 @@ -701,6 +748,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Maximum Accepted Value for Text Validation Is this Field an Identifier? 2 text_validation_max identifier 3 @@ -769,6 +821,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Branching Logic (Show field only if...) Is this Field Required? 2 branching_logic required_field 3 @@ -837,6 +894,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Custom Alignment Question Number (surveys only) Matrix Group Name 2 custom_alignment question_number matrix_group_name 3 @@ -905,6 +967,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Matrix Ranking? Field Annotation Data Type Count of Missing Values 2 matrix_ranking field_annotation skim_type n_missing 3 @@ -916,7 +983,7 @@ 9 character 0 10 character 3 11 character 3 - 12 numeric 1 + 12 numeric 0 13 factor 2 14 factor 2 15 factor 4 @@ -973,6 +1040,11 @@ 66 logical 0 67 logical 0 68 logical 0 + 69 character 0 + 70 factor 0 + 71 logical 0 + 72 logical 0 + 73 logical 0 Proportion of Non-Missing Values Shortest Value (Fewest Characters) 2 complete_rate character.min 3 @@ -984,7 +1056,7 @@ 9 1 1 10 0.25 4 11 0.25 5 - 12 0.75 + 12 1 13 0.5 14 0.5 15 0 @@ -1041,6 +1113,11 @@ 66 1 67 1 68 1 + 69 1 8 + 70 1 + 71 1 + 72 1 + 73 1 Longest Value (Most Characters) Count of Empty Values Count of Unique Values 2 character.max character.empty character.n_unique 3 @@ -1109,6 +1186,11 @@ 66 67 68 + 69 8 0 4 + 70 + 71 + 72 + 73 Count of Values that are all Whitespace Mean Standard Deviation 2 character.whitespace numeric.mean numeric.sd 3 @@ -1177,6 +1259,11 @@ 66 67 68 + 69 0 + 70 + 71 + 72 + 73 Minimum 25th Percentile Median 75th Percentile Maximum 2 numeric.p0 numeric.p25 numeric.p50 numeric.p75 numeric.p100 3 @@ -1245,6 +1332,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Histogram Is the Categorical Value Ordered? Count of Unique Values 2 numeric.hist factor.ordered factor.n_unique 3 @@ -1313,6 +1405,11 @@ 66 67 68 + 69 + 70 FALSE 3 + 71 + 72 + 73 Most Frequent Values Proportion of TRUE Values Count of Logical Values 2 factor.top_counts logical.mean logical.count 3 @@ -1381,6 +1478,11 @@ 66 0.666666666666667 TRU: 2, FAL: 1 67 0.333333333333333 FAL: 2, TRU: 1 68 0.333333333333333 FAL: 2, TRU: 1 + 69 + 70 A: 2, B: 1, C: 1 + 71 0.5 FAL: 2, TRU: 2 + 72 0.5 FAL: 2, TRU: 2 + 73 0.25 FAL: 3, TRU: 1 Earliest Latest Median Count of Unique Values Earliest 2 Date.min Date.max Date.median Date.n_unique POSIXct.min 3 @@ -1449,6 +1551,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Latest Median Count of Unique Values Minimum 2 POSIXct.max POSIXct.median POSIXct.n_unique difftime.min 3 @@ -1517,6 +1624,11 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 Maximum Median Count of Unique Values 2 difftime.max difftime.median difftime.n_unique 3 @@ -1585,11 +1697,16 @@ 66 67 68 + 69 + 70 + 71 + 72 + 73 [[2]] tab_name tab_sheet tab_ref - 1 Table1 1 A2:I12 + 1 Table1 1 A2:I13 2 Table2 2 A2:D6 3 Table3 3 A2:D6 4 Table4 4 A2:E6 @@ -1599,9 +1716,10 @@ 8 Table8 8 A2:E6 9 Table9 9 A2:I6 10 Table10 10 A2:J5 - 11 Table11 11 A2:AZ68 + 11 Table11 11 A2:G6 + 12 Table12 12 A2:AZ73 tab_xml - 1
+ 1
2
3
4
@@ -1611,7 +1729,8 @@ 8
9
10
- 11
+ 11
+ 12
tab_act 1 1 2 1 @@ -1624,6 +1743,7 @@ 9 1 10 1 11 1 + 12 1 [[3]] [[3]]$fileVersion @@ -1661,7 +1781,8 @@ [8] "" [9] "" [10] "" - [11] "" + [11] "" + [12] "" [[3]]$functionGroups NULL @@ -1715,11 +1836,12 @@ [9] "" [10] "" [11] "" - [12] "" - [13] "" + [12] "" + [13] "" + [14] "" [[5]] - [1] 1 2 3 4 5 6 7 8 9 10 11 + [1] 1 2 3 4 5 6 7 8 9 10 11 12 [[6]] [1] "Table of Contents" "Nonrepeated" @@ -1727,6 +1849,6 @@ [5] "Data Field Types" "Text Input Validation Types" [7] "API No Access" "API No Access 2" [9] "Survey" "Repeat Survey" - [11] "REDCap Metadata" + [11] "Labelled Vignette" "REDCap Metadata" From 2c5046b56773cc83ae8edaab4fd6dcc21cd7de13 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Tue, 15 Oct 2024 12:04:19 -0400 Subject: [PATCH 16/17] add_partial_keys redcap_arm factor -> character --- R/utils.R | 2 +- tests/testthat/test-utils.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utils.R b/R/utils.R index e660031b..302aab5b 100644 --- a/R/utils.R +++ b/R/utils.R @@ -24,7 +24,7 @@ add_partial_keys <- function(db_data, db_data <- db_data %>% mutate( redcap_event = sub(pattern, "\\1", {{ var }}), - redcap_arm = as.factor(sub(pattern, "\\2", {{ var }})) + redcap_arm = as.character(sub(pattern, "\\2", {{ var }})) ) } diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 344d4e9a..7eb23520 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -369,7 +369,7 @@ test_that("add_partial_keys works", { expect_s3_class(out, "data.frame") expect_true(nrow(out) > 0) - expected_redcap_arm_col <- factor(c(1, 1, 1, 1, "1b")) + expected_redcap_arm_col <- c("1", "1", "1", "1", "1b") expect_equal(out$redcap_arm, expected_redcap_arm_col) }) From 14d2587bba2c54a3f691859b19ff606dbde31829 Mon Sep 17 00:00:00 2001 From: Richard Hanna Date: Tue, 15 Oct 2024 15:22:21 -0400 Subject: [PATCH 17/17] Update NEWS.md --- NEWS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/NEWS.md b/NEWS.md index 14c724ee..cad90c90 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ - Added `combine_checkboxes()` analytics function - Use `combine_checkboxes()` to consolidate multiple checkbox fields in a REDCap data tibble under a single column - Fixed a bug for mixed structure databases resulting in data loss when some fields had dual repeating-separately/repeating-together behavior +- Fixed a bug where partial keys taken from REDCap arms could be incorrectly specified - Various improvements and additions with CRAN release of REDCapR 1.2.0: - `event_name` added as a column to the `redcap_event` column of longitudinal supertibbles - `guess_max` parameter in `read_redcap()` default updated to `Inf`