-
Notifications
You must be signed in to change notification settings - Fork 8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Missing Data Fix, Partial Keys Fix #205
Changes from 11 commits
0971400
e544edb
67d9634
625ea0c
7dd592b
734c3e5
e80733e
9eee96e
0e755f0
3ce0e12
374ad48
37b88a8
7f64232
593254d
a343d6e
851450f
0fdb9bf
2d0e61f
2c5046b
14d2587
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,7 +84,9 @@ clean_redcap_long <- function(db_data_long, | |
# Retrieve mixed structure fields and forms in reference df | ||
mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>% | ||
filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>% | ||
left_join(db_metadata_long %>% select("field_name", "form_name"), | ||
left_join( | ||
db_metadata_long %>% | ||
select("field_name", "form_name"), | ||
by = "field_name" | ||
) | ||
|
||
|
@@ -318,7 +320,10 @@ distill_repeat_table_long <- function(form_name, | |
db_data_long <- db_data_long %>% | ||
add_partial_keys(var = .data$redcap_event_name) %>% | ||
filter( | ||
!is.na(.data$redcap_form_instance) & | ||
( | ||
!is.na(.data$redcap_form_instance) | | ||
if_any(starts_with("redcap_event_instance"), ~ !is.na(.)) | ||
) & | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filter for when either |
||
.data$redcap_repeat_instrument == my_form | ||
) | ||
|
||
|
@@ -416,14 +421,38 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) { | |
) | ||
) | ||
|
||
repeat_together_present <- any( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since we now want to separate RT events so their instances get captured under |
||
is.na(db_data_long$redcap_repeat_instrument) & | ||
!is.na(db_data_long$redcap_repeat_instance) | ||
) | ||
|
||
if (!"redcap_event_instance" %in% names(db_data_long) && repeat_together_present) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If RT detected but no column supplied for |
||
db_data_long <- db_data_long %>% | ||
mutate( | ||
redcap_event_instance = NA | ||
) %>% | ||
relocate(.data$redcap_event_instance, .after = .data$redcap_repeat_instance) | ||
} | ||
|
||
if (repeat_together_present) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Finally, shift the redcap repeat instance values over to the redcap event instance column. |
||
db_data_long <- db_data_long %>% | ||
mutate( | ||
redcap_event_instance = case_when( | ||
# Shift form instances to even instances for repeat-together types | ||
update_mask & is.na(redcap_repeat_instrument) ~ redcap_repeat_instance, | ||
# Otherwise | ||
TRUE ~ redcap_event_instance | ||
) | ||
) | ||
} | ||
|
||
# Assign update data based on rules below | ||
db_data_long <- db_data_long %>% | ||
mutate( | ||
redcap_repeat_instance = case_when( | ||
# Add single instance repeat event instance vals when none exist | ||
update_mask & is.na(redcap_repeat_instance) ~ 1, | ||
rsh52 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Keep repeat event instance vals when they already exist | ||
update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance, | ||
# If repeat-together type, remove values from redcap_repeat_instance | ||
# (shifted and captured in redcap_event_instance) | ||
update_mask & is.na(redcap_repeat_instrument) ~ NA, | ||
TRUE ~ .data$redcap_repeat_instance | ||
), | ||
redcap_repeat_instrument = case_when( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
join_data_tibbles <- function(suprtbl, | ||
x, | ||
y, | ||
by = NULL, | ||
type = "left", | ||
suffix = c(".x", ".y")) { | ||
record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) # nolint: object_usage_linter | ||
join_fn <- get_join_fn(type) | ||
|
||
# Append the supertibble with the primary keys column | ||
suprtbl <- suprtbl |> | ||
mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>% | ||
select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data, | ||
.data$redcap_metadata, .data$structure, .data$pks) | ||
|
||
tbl_x <- extract_tibble(suprtbl, x) | ||
tbl_x_type <- get_structure(suprtbl, x) | ||
tbl_y <- extract_tibble(suprtbl, y) | ||
tbl_y_type <- get_structure(suprtbl, y) | ||
|
||
# Mixed structure requires special handling | ||
is_mixed <- any(c(tbl_x_type, tbl_y_type) == "mixed") | ||
|
||
if (is_mixed) { | ||
required_columns <- c("redcap_event_instance", "redcap_form_instance") | ||
tbl_x <- add_missing_columns(tbl_x, required_columns) | ||
tbl_y <- add_missing_columns(tbl_y, required_columns) | ||
} | ||
|
||
join_fn <- get_join_fn(type) | ||
by <- build_by(suprtbl, x, y, is_mixed) | ||
|
||
join_fn(tbl_x, tbl_y, by = by, suffix = suffix) %>% | ||
relocate(starts_with("form_status_complete"), .after = everything()) | ||
} | ||
|
||
extract_keys <- function(suprtbl, record_id_field) { | ||
redcap_keys <- c( | ||
record_id_field, "redcap_event", "redcap_form_instance", | ||
"redcap_event_instance", "redcap_arm" | ||
) | ||
|
||
suprtbl |> | ||
colnames() |> | ||
intersect(redcap_keys) |> | ||
paste(collapse = ", ") | ||
} | ||
|
||
get_structure <- function(suprtbl, tbl_name) { | ||
suprtbl$structure[suprtbl$redcap_form_name == tbl_name] | ||
} | ||
|
||
get_join_fn <- function(type) { | ||
join_functions <- list( | ||
left = dplyr::left_join, | ||
right = dplyr::right_join, | ||
inner = dplyr::inner_join, | ||
full = dplyr::full_join | ||
) | ||
|
||
# Check if the specified type is valid | ||
# TODO: Make a standard check function with cli | ||
if (!type %in% names(join_functions)) { | ||
stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.") | ||
} | ||
|
||
join_functions[[type]] | ||
} | ||
|
||
build_by <- function(suprtbl, x, y, is_mixed) { | ||
x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% | ||
stringr::str_split(", ", simplify = TRUE) | ||
y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% | ||
stringr::str_split(", ", simplify = TRUE) | ||
|
||
out <- intersect(x_pks, y_pks) | ||
|
||
if (is_mixed) { | ||
out <- c(out, "redcap_event_instance", "redcap_form_instance") %>% | ||
# TODO: Make standard, currently needed for repeat/mixed joins | ||
unique() | ||
} | ||
|
||
out | ||
} | ||
|
||
add_missing_columns <- function(tbl, columns) { | ||
missing_cols <- setdiff(columns, names(tbl)) | ||
tbl[missing_cols] <- NA | ||
return(tbl) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -270,8 +270,11 @@ read_redcap <- function(redcap_uri, | |
} | ||
|
||
if (is_longitudinal) { | ||
repeat_event_types <- get_repeat_event_types(db_data) | ||
|
||
linked_arms <- link_arms( | ||
redcap_uri = redcap_uri, token = token, | ||
redcap_uri = redcap_uri, | ||
token = token, | ||
suppress_redcapr_messages = suppress_redcapr_messages | ||
) | ||
|
||
|
@@ -292,7 +295,7 @@ read_redcap <- function(redcap_uri, | |
out <- add_metadata(out, db_metadata, redcap_uri, token, suppress_redcapr_messages) | ||
|
||
if (is_longitudinal) { | ||
out <- add_event_mapping(out, linked_arms) | ||
out <- add_event_mapping(out, linked_arms, repeat_event_types) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add |
||
} | ||
|
||
out <- out %>% | ||
|
@@ -440,18 +443,21 @@ add_metadata <- function(supertbl, db_metadata, redcap_uri, token, suppress_redc | |
#' @param supertbl a supertibble object to supplement with metadata | ||
#' @param linked_arms the tibble with event mappings created by | ||
#' \code{link_arms()} | ||
#' @param repeat_event_types a dataframe output from [repeat_event_types()] which | ||
#' specifies NR, RS, and RT types for events | ||
#' | ||
#' @return | ||
#' The original supertibble with an events \code{redcap_events} list column | ||
#' containing arms and events associated with each instrument | ||
#' | ||
#' @keywords internal | ||
#' | ||
add_event_mapping <- function(supertbl, linked_arms) { | ||
|
||
add_event_mapping <- function(supertbl, linked_arms, repeat_event_types) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
event_info <- linked_arms %>% | ||
left_join(repeat_event_types, by = c("unique_event_name" = "redcap_event_name")) %>% | ||
add_partial_keys(.data$unique_event_name) %>% | ||
select( | ||
redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name" | ||
redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name", "repeat_type" | ||
) %>% | ||
nest(redcap_events = !"redcap_form_name") | ||
|
||
|
@@ -498,3 +504,50 @@ calc_metadata_stats <- function(data) { | |
form_complete_pct = percent(form_complete_pct, digits = 2, format = "fg") | ||
) | ||
} | ||
|
||
#' @title | ||
#' Add identification for repeat event types | ||
#' | ||
#' @description | ||
#' To correctly assign repeat event types a few assumptions must be made: | ||
#' | ||
#' - There are only 3 behaviors: nonrepeating, repeat_separately, and repeat_together | ||
#' - If an event only shows `redcap_repeat_instance` and `redcap_repeat_instrument` | ||
#' as `NA`, it can be considered a nonrepeat event. | ||
#' - If an event is always `NA` for `redcap_repeat_instrument` and filled for `redcap_repeat_instance` | ||
#' it can be assumed to be a repeat_together event | ||
#' - repeat_separate and nonrepeating event types exhibit the same behavior along the | ||
#' primary keys of the data. nonrepeating event types can have data display with | ||
#' `redcap_repeat_instance`values both filled and as `NA`. If this is the case, | ||
#' it can be assumed the event is a repeating separate event. | ||
#' | ||
#' @param data the REDCap data | ||
#' | ||
#' @return | ||
#' A dataframe with unique event names mapped to their corresponding repeat types | ||
#' | ||
#' @keywords internal | ||
|
||
get_repeat_event_types <- function(data) { | ||
out <- data %>% | ||
dplyr::distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>% | ||
mutate( | ||
repeat_type = case_when( | ||
!is.na(redcap_event_name) & !is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~ | ||
"repeat_separate", | ||
!is.na(redcap_event_name) & is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~ | ||
"repeat_together", | ||
TRUE ~ "nonrepeating" | ||
) | ||
) %>% | ||
dplyr::distinct(.data$redcap_event_name, .data$repeat_type) | ||
|
||
# Check for instances where the same event is labelled as nonrepeating & repeating separate | ||
# If this is the case, it must be repeating separate (there is just data that qualifies as both) | ||
out %>% | ||
mutate( | ||
is_duplicated = duplicated(.data$repeat_type) | duplicated(.data$repeat_type, fromLast = TRUE) | ||
) %>% | ||
filter(.data$is_duplicated == FALSE | (.data$is_duplicated == TRUE & .data$ repeat_type == "repeat_separate")) %>% | ||
select(-.data$is_duplicated) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,12 +18,13 @@ | |
add_partial_keys <- function(db_data, | ||
var = NULL) { | ||
if (!is.null(enexpr(var))) { | ||
pattern <- "^(\\w+?)_arm_(\\d)$" | ||
# Include handling for instances where REDCap appends with "_1b" or similar | ||
pattern <- "^(\\w+?)_arm_(\\d+\\w?)$" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fix regex to address #206 |
||
|
||
db_data <- db_data %>% | ||
mutate( | ||
redcap_event = sub(pattern, "\\1", {{ var }}), | ||
redcap_arm = as.integer(sub(pattern, "\\2", {{ var }})) | ||
redcap_arm = as.factor(sub(pattern, "\\2", {{ var }})) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Convert to factor to capture non-integer arm values as mentioned in #206 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think |
||
) | ||
} | ||
|
||
|
@@ -64,31 +65,38 @@ create_repeat_instance_vars <- function(db_data) { | |
} | ||
|
||
# Detect if repeat events exist | ||
# Determined by non-NA vals in new "redcap_form_instance" alongside | ||
# NA vals in "redcap_repeat_instrument" | ||
# `has_repeat_forms` will always be TRUE for events to exist | ||
if (has_repeat_forms) { | ||
# First determined if redcap_event_instance added during mixed structure handling | ||
# See: convert_mixed_instrument | ||
has_repeat_events <- "redcap_event_instance" %in% names(out) | ||
|
||
# Next determined by non-NA vals in new "redcap_form_instance" alongside | ||
# NA vals in "redcap_repeat_instrument" | ||
# `has_repeat_forms` will always be TRUE for events to exist | ||
if (has_repeat_forms && !has_repeat_events) { | ||
has_repeat_events <- any( | ||
is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance) | ||
) | ||
} else { | ||
has_repeat_events <- FALSE | ||
} | ||
|
||
if (has_repeat_events) { | ||
out$redcap_event_instance <- ifelse( | ||
is.na(out$redcap_repeat_instrument) & | ||
!is.na(out$redcap_form_instance), | ||
out$redcap_form_instance, | ||
NA | ||
) | ||
# In cases where there are repeating events but they were not added by | ||
# convert_mixed_instrument(), add an empty redcap_event_instance column | ||
if (!"redcap_event_instance" %in% names(out)) { | ||
out$redcap_event_instance <- NA | ||
} | ||
|
||
out$redcap_form_instance <- ifelse( | ||
is.na(out$redcap_repeat_instrument) & | ||
!is.na(out$redcap_form_instance), | ||
NA, | ||
out$redcap_form_instance | ||
) | ||
out <- out %>% | ||
mutate( | ||
redcap_event_instance = case_when( | ||
is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ redcap_form_instance, | ||
# Else leave NA or the value given by conver_mixed_instrument() | ||
TRUE ~ redcap_event_instance | ||
), | ||
redcap_form_instance = case_when( | ||
is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ NA, | ||
TRUE ~ redcap_form_instance | ||
) | ||
) | ||
|
||
out <- relocate(out, | ||
"redcap_event_instance", | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No change, just lintr.