Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mixed Structure Data Loss Bug Resolution #201

Merged
merged 8 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
# REDCapTidieR 1.2.0
# REDCapTidieR 1.2.0 (development version)

# REDCapTidieR 1.1.1 (development version)
- Added `combine_checkboxes()` analytics function
- Use `combine_checkboxes()` to consolidate multiple checkbox fields in a REDCap data tibble under a single column
- Fixed a bug for mixed structure databases resulting in data loss when some fields had dual repeating-separately/repeating-together behavior

Version 1.1.1
# REDCapTidieR 1.1.1

Version 1.1.1 (Released 2024-04-18)
==========================================================

- `read_redcap(raw_or_label = "haven")` now correctly casts categorical data values to character when their type is not character or numeric.
Expand Down
42 changes: 32 additions & 10 deletions R/clean_redcap_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ clean_redcap_long <- function(db_data_long,
has_mixed_structure_forms <- FALSE # nolint: object_usage_linter

mixed_structure_ref <- data.frame()

if (allow_mixed_structure) {
# Retrieve mixed structure fields and forms in reference df
mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>%
Expand Down Expand Up @@ -399,17 +398,40 @@ distill_repeat_table_long <- function(form_name,

convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
for (i in seq_len(nrow(mixed_structure_ref))) {
field <- mixed_structure_ref$field_name[i]
form <- mixed_structure_ref$form_name[i]

# Create a logical mask for rows needing update
update_mask <- is.na(db_data_long$redcap_repeat_instance) & !is.na(db_data_long[[field]])
field <- mixed_structure_ref$field_name[i] # nolint: object_usage_linter
form <- mixed_structure_ref$form_name[i] # nolint: object_usage_linter

# Update redcap_repeat_instance
db_data_long$redcap_repeat_instance <- if_else(update_mask, 1, db_data_long$redcap_repeat_instance)
# Create an update mask column to identify which mixed structure rows need updates
db_data_long <- db_data_long %>%
mutate(
update_mask = case_when(
# repeat separately instances
!is.na(!!as.symbol(field)) &
is.na(.data$redcap_repeat_instance) ~ TRUE,
# repeat together instances
!is.na(!!as.symbol(field)) &
!is.na(.data$redcap_repeat_instance) &
is.na(.data$redcap_repeat_instrument) ~ TRUE,
TRUE ~ FALSE
)
)

# Update redcap_repeat_instrument
db_data_long$redcap_repeat_instrument <- if_else(update_mask, form, db_data_long$redcap_repeat_instrument)
# Assign update data based on rules below
db_data_long <- db_data_long %>%
mutate(
redcap_repeat_instance = case_when(
# Add single instance repeat event instance vals when none exist
update_mask & is.na(redcap_repeat_instance) ~ 1,
# Keep repeat event instance vals when they already exist
update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance,
TRUE ~ .data$redcap_repeat_instance
),
redcap_repeat_instrument = case_when(
update_mask ~ form,
TRUE ~ .data$redcap_repeat_instrument
)
) %>%
select(-.data$update_mask)
}

db_data_long
Expand Down
29 changes: 18 additions & 11 deletions tests/testthat/test-clean_redcap_long.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,24 +290,31 @@ test_that("get_mixed_structure_fields works", {

test_that("convert_mixed_instrument works", {
mixed_structure_db <- tibble::tribble(
~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable,
1, NA, NA, "A", NA,
2, "mixed_structure_form", 1, "B", NA,
3, "repeat_form", 1, NA, "C",
4, "repeat_form", 2, NA, "D"
~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable,
~repeat_form_variable, ~mixed_repeat_var,
1, NA, NA, "A", NA, NA,
2, "mixed_structure_form", 1, "B", NA, NA,
3, "repeat_form", 1, NA, "C", NA,
4, "repeat_form", 2, NA, "D", NA,
5, "mixed_repeat_together", 1, NA, NA, "E",
5, "mixed_repeat_together", 2, NA, NA, "F"
)

mixed_structure_ref <- tibble::tribble(
~field_name, ~rep_and_nonrep, ~form_name,
"mixed_structure_variable", TRUE, "mixed_structure_form"
"mixed_structure_variable", TRUE, "mixed_structure_form",
"mixed_repeat_var", TRUE, "mixed_repeat_together"
)

expected_out <- tibble::tribble(
~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable, ~repeat_form_variable,
1, "mixed_structure_form", 1, "A", NA,
2, "mixed_structure_form", 1, "B", NA,
3, "repeat_form", 1, NA, "C",
4, "repeat_form", 2, NA, "D"
~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable,
~repeat_form_variable, ~mixed_repeat_var,
1, "mixed_structure_form", 1, "A", NA, NA,
2, "mixed_structure_form", 1, "B", NA, NA,
3, "repeat_form", 1, NA, "C", NA,
4, "repeat_form", 2, NA, "D", NA,
5, "mixed_repeat_together", 1, NA, NA, "E",
5, "mixed_repeat_together", 2, NA, NA, "F"
)

out <- convert_mixed_instrument(mixed_structure_db, mixed_structure_ref)
Expand Down
Loading