Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Missing Data Fix, Partial Keys Fix #205

Merged
merged 20 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,coalesce)
importFrom(dplyr,cur_column)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,first)
importFrom(dplyr,group_by)
Expand Down Expand Up @@ -147,6 +148,7 @@ importFrom(tidyselect,any_of)
importFrom(tidyselect,ends_with)
importFrom(tidyselect,eval_select)
importFrom(tidyselect,everything)
importFrom(tidyselect,matches)
importFrom(tidyselect,starts_with)
importFrom(tidyselect,where)
importFrom(vctrs,vec_ptype)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
- Added `combine_checkboxes()` analytics function
- Use `combine_checkboxes()` to consolidate multiple checkbox fields in a REDCap data tibble under a single column
- Fixed a bug for mixed structure databases resulting in data loss when some fields had dual repeating-separately/repeating-together behavior
- Fixed a bug where partial keys taken from REDCap arms could be incorrectly specified
- Various improvements and additions with CRAN release of REDCapR 1.2.0:
- `event_name` added as a column to the `redcap_event` column of longitudinal supertibbles
- `guess_max` parameter in `read_redcap()` default updated to `Inf`
Expand Down
4 changes: 2 additions & 2 deletions R/REDCapTidieR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else
#' left_join mutate pull recode relocate rename right_join row_number rowwise
#' select slice summarise ungroup coalesce cur_column bind_cols first nth n_distinct
#' first
#' first distinct
#' @importFrom formattable percent
#' @importFrom lobstr obj_size
#' @importFrom lubridate is.difftime is.period is.POSIXt is.Date
Expand All @@ -27,7 +27,7 @@
#' @importFrom tidyr complete fill pivot_wider nest separate_wider_delim unnest
#' unnest_wider
#' @importFrom tidyselect all_of any_of ends_with eval_select everything
#' starts_with where
#' starts_with where matches
#' @importFrom vctrs vec_ptype_abbr vec_ptype
#' @importFrom pillar tbl_sum
#' @importFrom readr parse_logical parse_integer parse_double parse_date parse_time
Expand Down
40 changes: 36 additions & 4 deletions R/clean_redcap_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ clean_redcap_long <- function(db_data_long,
# Retrieve mixed structure fields and forms in reference df
mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>%
filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>%
left_join(db_metadata_long %>% select("field_name", "form_name"),
left_join(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No change, just lintr.

db_metadata_long %>%
select("field_name", "form_name"),
by = "field_name"
)

Expand Down Expand Up @@ -318,7 +320,10 @@ distill_repeat_table_long <- function(form_name,
db_data_long <- db_data_long %>%
add_partial_keys(var = .data$redcap_event_name) %>%
filter(
!is.na(.data$redcap_form_instance) &
(
!is.na(.data$redcap_form_instance) |
if_any(matches("redcap_event_instance"), ~ !is.na(.))
) &
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filter for when either redcap_form_instance is NA or redcap_event_instance is NA (if it exists).

.data$redcap_repeat_instrument == my_form
)

Expand Down Expand Up @@ -416,14 +421,41 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
)
)

repeat_together_present <- any(
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we now want to separate RT events so their instances get captured under redcap_event_instance, we first need to check for the right behavior for RTs (i.e. no repeating instrument but a repeat instance detected).

is.na(db_data_long$redcap_repeat_instrument) &
!is.na(db_data_long$redcap_repeat_instance)
)

if (!"redcap_event_instance" %in% names(db_data_long) && repeat_together_present) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If RT detected but no column supplied for redcap_event_instance, add it as an empty column.

db_data_long <- db_data_long %>%
mutate(
redcap_event_instance = NA
) %>%
relocate(.data$redcap_event_instance, .after = .data$redcap_repeat_instance)
}

if (repeat_together_present) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Finally, shift the redcap repeat instance values over to the redcap event instance column.

db_data_long <- db_data_long %>%
mutate(
redcap_event_instance = case_when(
# Shift form instances to even instances for repeat-together types
update_mask & is.na(redcap_repeat_instrument) ~ redcap_repeat_instance,
# Otherwise
TRUE ~ redcap_event_instance
)
)
}

# Assign update data based on rules below
db_data_long <- db_data_long %>%
mutate(
redcap_repeat_instance = case_when(
# Add single instance repeat event instance vals when none exist
# This handles nonrepeating data in events set to repeat separately
update_mask & is.na(redcap_repeat_instance) ~ 1,
rsh52 marked this conversation as resolved.
Show resolved Hide resolved
# Keep repeat event instance vals when they already exist
update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance,
# If repeat-together type, remove values from redcap_repeat_instance
# (shifted and captured in redcap_event_instance)
update_mask & is.na(redcap_repeat_instrument) ~ NA,
TRUE ~ .data$redcap_repeat_instance
),
redcap_repeat_instrument = case_when(
Expand Down
64 changes: 59 additions & 5 deletions R/read_redcap.R
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,11 @@ read_redcap <- function(redcap_uri,
}

if (is_longitudinal) {
repeat_event_types <- get_repeat_event_types(db_data)

linked_arms <- link_arms(
redcap_uri = redcap_uri, token = token,
redcap_uri = redcap_uri,
token = token,
suppress_redcapr_messages = suppress_redcapr_messages
)

Expand All @@ -292,7 +295,7 @@ read_redcap <- function(redcap_uri,
out <- add_metadata(out, db_metadata, redcap_uri, token, suppress_redcapr_messages)

if (is_longitudinal) {
out <- add_event_mapping(out, linked_arms)
out <- add_event_mapping(out, linked_arms, repeat_event_types)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add repeat_event_types reference table to add_event_mapping()

}

out <- out %>%
Expand Down Expand Up @@ -440,18 +443,21 @@ add_metadata <- function(supertbl, db_metadata, redcap_uri, token, suppress_redc
#' @param supertbl a supertibble object to supplement with metadata
#' @param linked_arms the tibble with event mappings created by
#' \code{link_arms()}
#' @param repeat_event_types a dataframe output from [get_repeat_event_types()] which
#' specifies NR, RS, and RT types for events
#'
#' @return
#' The original supertibble with an events \code{redcap_events} list column
#' containing arms and events associated with each instrument
#'
#' @keywords internal
#'
add_event_mapping <- function(supertbl, linked_arms) {

add_event_mapping <- function(supertbl, linked_arms, repeat_event_types) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add_event_mapping() now accepts a reference table for the repeating event types designations. This table is joined onto the linked_arms data to make repeat_type available in the eventual redcap_events supertibble column.

event_info <- linked_arms %>%
left_join(repeat_event_types, by = c("unique_event_name" = "redcap_event_name")) %>%
add_partial_keys(.data$unique_event_name) %>%
select(
redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name"
redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name", "repeat_type"
) %>%
nest(redcap_events = !"redcap_form_name")

Expand Down Expand Up @@ -498,3 +504,51 @@ calc_metadata_stats <- function(data) {
form_complete_pct = percent(form_complete_pct, digits = 2, format = "fg")
)
}

#' @title
#' Add identification for repeat event types
#'
#' @description
#' To correctly assign repeat event types a few assumptions must be made:
#'
#' - There are only 3 behaviors: nonrepeating, repeat_separately, and repeat_together
#' - If an event only shows `redcap_repeat_instance` and `redcap_repeat_instrument`
#' as `NA`, it can be considered a nonrepeat event.
#' - If an event is always `NA` for `redcap_repeat_instrument` and filled for `redcap_repeat_instance`
#' it can be assumed to be a repeat_together event
#' - repeat_separate and nonrepeating event types exhibit the same behavior along the
#' primary keys of the data. nonrepeating event types can have data display with
#' `redcap_repeat_instance`values both filled and as `NA`. If this is the case,
#' it can be assumed the event is a repeating separate event.
#'
#' @param data the REDCap data
#'
#' @return
#' A dataframe with unique event names mapped to their corresponding repeat types
#'
#' @keywords internal

get_repeat_event_types <- function(data) {
out <- data %>%
distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>%
mutate(
repeat_type = case_when(
!is.na(redcap_event_name) & !is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~
"repeat_separate",
!is.na(redcap_event_name) & is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~
"repeat_together",
TRUE ~ "nonrepeating"
)
) %>%
distinct(.data$redcap_event_name, .data$repeat_type)

# Check for instances where the same event is labelled as nonrepeating & repeating separate
# If this is the case, it must be repeating separate (there is just data that qualifies as both)

out %>%
mutate(
is_duplicated = (duplicated(.data$redcap_event_name) | duplicated(.data$redcap_event_name, fromLast = TRUE))
) %>%
filter(!.data$is_duplicated | (.data$is_duplicated & .data$repeat_type == "repeat_separate")) %>%
select(-.data$is_duplicated)
}
48 changes: 28 additions & 20 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
add_partial_keys <- function(db_data,
var = NULL) {
if (!is.null(enexpr(var))) {
pattern <- "^(\\w+?)_arm_(\\d)$"
# Include handling for instances where REDCap appends with "_1b" or similar
pattern <- "^(\\w+?)_arm_(\\d+\\w?)$"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix regex to address #206


db_data <- db_data %>%
mutate(
redcap_event = sub(pattern, "\\1", {{ var }}),
redcap_arm = as.integer(sub(pattern, "\\2", {{ var }}))
redcap_arm = as.character(sub(pattern, "\\2", {{ var }}))
)
}

Expand Down Expand Up @@ -64,31 +65,38 @@ create_repeat_instance_vars <- function(db_data) {
}

# Detect if repeat events exist
# Determined by non-NA vals in new "redcap_form_instance" alongside
# NA vals in "redcap_repeat_instrument"
# `has_repeat_forms` will always be TRUE for events to exist
if (has_repeat_forms) {
# First determined if redcap_event_instance added during mixed structure handling
# See: convert_mixed_instrument
has_repeat_events <- "redcap_event_instance" %in% names(out)

# Next determined by non-NA vals in new "redcap_form_instance" alongside
# NA vals in "redcap_repeat_instrument"
# `has_repeat_forms` will always be TRUE for events to exist
if (has_repeat_forms && !has_repeat_events) {
has_repeat_events <- any(
is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance)
)
} else {
has_repeat_events <- FALSE
}

if (has_repeat_events) {
out$redcap_event_instance <- ifelse(
is.na(out$redcap_repeat_instrument) &
!is.na(out$redcap_form_instance),
out$redcap_form_instance,
NA
)
# In cases where there are repeating events but they were not added by
# convert_mixed_instrument(), add an empty redcap_event_instance column
if (!"redcap_event_instance" %in% names(out)) {
out$redcap_event_instance <- NA
}

out$redcap_form_instance <- ifelse(
is.na(out$redcap_repeat_instrument) &
!is.na(out$redcap_form_instance),
NA,
out$redcap_form_instance
)
out <- out %>%
mutate(
redcap_event_instance = case_when(
is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ redcap_form_instance,
# Else leave NA or the value given by conver_mixed_instrument()
TRUE ~ redcap_event_instance
),
redcap_form_instance = case_when(
is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ NA,
TRUE ~ redcap_form_instance
)
)

out <- relocate(out,
"redcap_event_instance",
Expand Down
5 changes: 4 additions & 1 deletion man/add_event_mapping.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/get_repeat_event_types.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading