Skip to content

Commit

Permalink
Support multiple values_to, logicals, new checks
Browse files Browse the repository at this point in the history
  • Loading branch information
Richard Hanna authored and Richard Hanna committed Jul 24, 2024
1 parent 2dfac9a commit ed55292
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 69 deletions.
29 changes: 29 additions & 0 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -712,3 +712,32 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) {
)
}
}

#' @title
#' Check values_to length against detected number of checkbox fields
#'
#' @param col_groups a list of column groups identified by checkbox field detection
#' @param values_to a user defined character vector passed from [combine_checkboxes()]
#' @param call The calling environment to use in the error message
#'
#' @keywords internal
check_values_to_length <- function(col_groups, values_to, call = caller_env()) {
if (length(values_to) < length(names(col_groups))) {
cli_warn(
message = c(
`!` = "Detected fewer {.code values_to} arguments than the number of checkbox fields. Only the first {length(values_to)} will be used."

Check warning on line 728 in R/checks.R

View workflow job for this annotation

GitHub Actions / lint

file=R/checks.R,line=728,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 143 characters.
),
class = c("checkbox_value_to_length", "REDCapTidieR_cond")
)
}

if (length(values_to) > length(names(col_groups))) {
cli_abort(
message = c(
`x` = "The number of {.code values_to} arguments supplied is greater than the number of checkbox fields detected.",

Check warning on line 737 in R/checks.R

View workflow job for this annotation

GitHub Actions / lint

file=R/checks.R,line=737,col=121,[line_length_linter] Lines should not be more than 120 characters. This line is 123 characters.
`i` = "{length(values_to)} {.code values_to} supplied, {length(names(col_groups))} checkbox fields detected."
),
class = c("checkbox_value_to_length", "REDCapTidieR_cond")
)
}
}
118 changes: 69 additions & 49 deletions R/combine_checkboxes.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,68 +48,81 @@ combine_checkboxes <- function(supertbl,
# Check args ---
check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata"))
check_arg_is_character(tbl, len = 1, any.missing = FALSE)
check_arg_is_character(values_to, len = 1, any.missing = FALSE)
check_arg_is_character(values_to, any.missing = FALSE)
check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE)
check_arg_is_character(values_fill, len = 1, any.missing = TRUE)
check_arg_choices(raw_or_label, choices = c("label", "raw"))
check_arg_is_logical(keep, len = 1, any.missing = FALSE)

# Save user cols to enquosure
cols_exp <- enquo(cols)

# Extract tbl from supertbl
data_tbl <- supertbl %>%
extract_tibble(tbl)

# Get field names from cols_exp, check that fields exist
field_names <- names(eval_select(cols_exp, data = data_tbl))
check_fields_exist(fields = field_names, expr = cols_exp)
# Save user cols to quosure
cols_exp <- enquo(cols)

# Define values_to as the count of TRUEs/1s for the given checkbox field
# Assign TRUE if multiple selections made, and FALSE if one or zero made
data_tbl_mod <- data_tbl %>%
mutate(
!!values_to := case_when(
rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE,
TRUE ~ FALSE
)
)
# Evaluate the cols expression to get the selected column names
selected_cols <- names(eval_select(cols_exp, data = data_tbl))
check_fields_exist(fields = selected_cols, expr = cols_exp) # Check supplied fields exist

# Extract the prefix of each selected column
prefixes <- sub("___.*", "", selected_cols)

# Split the selected columns based on their prefixes
col_groups <- split(selected_cols, prefixes)
check_values_to_length(col_groups, values_to) # Check values_to columns match length of fields

# Get metadata reference table, check that chosen fields are checkboxes
metadata_tbl <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]
metadata_ref <- get_metadata_ref(metadata_tbl, field_names)
metadata_ref <- get_metadata_ref(metadata_tbl, selected_cols)

# Define values_to as the count of TRUEs/1s for the given checkbox field
# Assign TRUE if multiple selections made, and FALSE if one or zero made
data_tbl_mod <- data_tbl

for (i in seq_along(values_to)) {
data_tbl_mod <- data_tbl_mod %>%
mutate(
!!values_to[i] := case_when(
rowSums(select(., col_groups[[i]])) > 1 ~ TRUE,
.default = FALSE
)
)
}

# Replace TRUEs/1s with raw/label values from metadata
data_tbl_mod <- data_tbl_mod %>%
mutate(across(
field_names,
selected_cols,
~ replace_true(.x,
cur_column(),
metadata = metadata_ref,
raw_or_label = raw_or_label
cur_column(),

Check warning on line 98 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=98,col=21,[indentation_linter] Indentation should be 8 spaces but is 21 spaces.
metadata = metadata_ref,
raw_or_label = raw_or_label
)
))
),
across(selected_cols, as.character) # enforce to character strings
)

# Convert values_to from TRUE/FALSE to multi_value_label or identified single val
data_tbl_mod <- data_tbl_mod %>%
mutate(
across(field_names, as.character) # enforce to character strings
) %>%
mutate(
!!values_to := ifelse(!!sym(values_to),
multi_value_label,
coalesce(!!!syms(field_names))
),
!!values_to := ifelse(is.na(!!sym(values_to)),
values_fill,
!!sym(values_to)
for (i in seq_along(values_to)) {
metadata_overwrite <- metadata_ref %>% filter(field_name %in% col_groups[[i]]) %>% pull(raw_or_label)

Check warning on line 107 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=107,col=51,[object_usage_linter] no visible binding for global variable 'field_name'

data_tbl_mod <- data_tbl_mod %>%
mutate(
!!values_to[i] := ifelse(!!sym(values_to[i]),
multi_value_label,

Check warning on line 112 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=112,col=33,[indentation_linter] Indentation should be 10 spaces but is 33 spaces.
coalesce(!!!syms(col_groups[[i]]))
),
!!values_to[i] := ifelse(is.na(!!sym(values_to[i])),
values_fill,

Check warning on line 116 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=116,col=33,[indentation_linter] Indentation should be 10 spaces but is 33 spaces.
!!sym(values_to[i])
)
) %>%
mutate(
!!values_to[i] := factor(!!sym(values_to[i]),
levels = c(metadata_overwrite, multi_value_label, values_fill)

Check warning on line 122 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=122,col=33,[indentation_linter] Indentation should be 10 spaces but is 33 spaces.
)
)
) %>%
mutate(
!!values_to := factor(!!sym(values_to),
levels = c(metadata_ref[[raw_or_label]], multi_value_label, values_fill)
)
)
}

final_tbl <- bind_cols(
data_tbl,
Expand All @@ -119,7 +132,7 @@ combine_checkboxes <- function(supertbl,
# Keep or remove original multi columns
if (!keep) {
final_tbl <- final_tbl %>%
select(-field_names)
select(-selected_cols)
}

# Update the supertbl data tibble
Expand All @@ -131,29 +144,36 @@ combine_checkboxes <- function(supertbl,
#' @title Utility function for getting metadata raw and label values for checkboxes
#'
#' @param metadata_tbl A metadata tibble from the supertibble generated by [read_redcap()].
#' @param field_names Character string vector of field names for checkbox combination
#' @param selected_cols Character string vector of field names for checkbox combination
#'
#' @returns a tibble
#'
#' @keywords internal
get_metadata_ref <- function(metadata_tbl,
field_names) {
selected_cols) {
# Create a metadata reference table linking field name to raw and label values
out <- metadata_tbl %>%
filter(.data$field_name %in% field_names) %>%
# TODO: original_field a temporary placeholder for future multi-field and mapping dev
filter(.data$field_name %in% selected_cols) %>%
mutate(
original_field = sub("___.*$", "", .data$field_name)
)

# Make sure selection is checkbox metadata field type
check_fields_are_checkboxes(out)

# TODO: Make more robust for multi-field and mapping, using original_field above
parsed_vals <- parse_labels(first(out$select_choices_or_calculations))
# Bind raw/label values per original field grouping
parsed_vals <- tibble()

for (i in seq_along(unique(out$original_field))) {
index <- unique(out$original_field)[i]
out_filtered <- out %>% filter(original_field == index)

Check warning on line 169 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=169,col=36,[object_usage_linter] no visible binding for global variable 'original_field'

parsed_vals <- rbind(parsed_vals, parse_labels(first(out_filtered$select_choices_or_calculations)))
}

bind_cols(out, parsed_vals) %>%
select(.data$field_name, .data$raw, .data$label)
select(.data$field_name, .data$raw, .data$label, original_field) %>%

Check warning on line 175 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=175,col=54,[object_usage_linter] no visible binding for global variable 'original_field'

Check warning on line 175 in R/combine_checkboxes.R

View workflow job for this annotation

GitHub Actions / lint

file=R/combine_checkboxes.R,line=175,col=54,[object_usage_linter] no visible binding for global variable 'original_field'
relocate(original_field, .after = field_name)
}

#' @noRd
Expand Down
19 changes: 19 additions & 0 deletions man/check_values_to_length.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/get_metadata_ref.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions tests/testthat/test-checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,22 @@ test_that("check_fields_are_checkboxes works", {
expect_error(check_fields_are_checkboxes(metadata), class = "non_checkbox_fields")
expect_no_error(check_fields_are_checkboxes(metadata_filtered))
})

test_that("check_values_to_length length works", {
col_groups <- list(
checkbox_1 = c("checkbox1___1", "checkbox1___2"),
checkbox_2 = c("checkbox2___1")
)

values_to <- c("new_col1", "new_col2")

expect_no_message(check_values_to_length(col_groups, values_to))

values_to_warn <- "new_col1"

expect_warning(check_values_to_length(col_groups, values_to_warn), class = "checkbox_value_to_length")

values_to_error <- c("new_col1", "new_col2", "new_col3")

expect_error(check_values_to_length(col_groups, values_to_error), class = "checkbox_value_to_length")
})
87 changes: 69 additions & 18 deletions tests/testthat/test-combine_checkboxes.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
nonrepeat_data <- tibble::tribble(
~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data",
1, TRUE, FALSE, FALSE, 1,
2, TRUE, TRUE, FALSE, 2,
3, FALSE, FALSE, FALSE, 3
~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data",
1, TRUE, FALSE, FALSE, TRUE, 1,
2, TRUE, TRUE, FALSE, TRUE, 2,
3, FALSE, FALSE, FALSE, FALSE, 3
)

nonrepeat_metadata <- tibble::tribble(
Expand All @@ -11,7 +11,8 @@ nonrepeat_metadata <- tibble::tribble(
"multi___1", "checkbox", "1, Red | 2, Yellow | 3, Blue",
"multi___2", "checkbox", "1, Red | 2, Yellow | 3, Blue",
"multi___3", "checkbox", "1, Red | 2, Yellow | 3, Blue",
"extra_data", "dropdown", "1, 1 | 2, 2 | 3,3"
"single_checkbox___1", "checkbox", "4, Green",
"extra_data", "dropdown", "1, 1 | 2, 2 | 3, 3"
)

repeat_data <- tibble::tribble(
Expand Down Expand Up @@ -62,10 +63,10 @@ test_that("combine_checkboxes works for nonrepeat instrument", {
dplyr::first()

expected_out <- tibble::tribble(
~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"extra_data", ~"new_col",
1, TRUE, FALSE, FALSE, 1, "Red",
2, TRUE, TRUE, FALSE, 2, "multiple",
3, FALSE, FALSE, FALSE, 3, "none"
~"study_id", ~"multi___1", ~"multi___2", ~"multi___3", ~"single_checkbox___1", ~"extra_data", ~"new_col",
1, TRUE, FALSE, FALSE, TRUE, 1, "Red",
2, TRUE, TRUE, FALSE, TRUE, 2, "multiple",
3, FALSE, FALSE, FALSE, FALSE, 3, "none"
) %>%
mutate(
new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "multiple", "none"))
Expand All @@ -86,10 +87,10 @@ test_that("combine_checkboxes works for nonrepeat instrument and drop old values
dplyr::first()

expected_out <- tibble::tribble(
~"study_id", ~"extra_data", ~"new_col",
1, 1, "Red",
2, 2, "Multiple",
3, 3, NA
~"study_id", ~"single_checkbox___1", ~"extra_data", ~"new_col",
1, TRUE, 1, "Red",
2, TRUE, 2, "Multiple",
3, FALSE, 3, NA
) %>%
mutate(
new_col = factor(new_col, levels = c("Red", "Yellow", "Blue", "Multiple"))
Expand Down Expand Up @@ -124,14 +125,14 @@ test_that("combine_checkboxes works for repeat instrument", {
test_that("get_metadata_ref works", {
out <- get_metadata_ref(
metadata_tbl = supertbl$redcap_metadata[[1]],
field_names = c("multi___1", "multi___2", "multi___3")
selected_cols = c("multi___1", "multi___2", "multi___3")
)

expected_out <- tibble::tribble(
~"field_name", ~"raw", ~"label",
"multi___1", "1", "Red",
"multi___2", "2", "Yellow",
"multi___3", "3", "Blue"
~"field_name", ~"original_field", ~"raw", ~"label",
"multi___1", "multi", "1", "Red",
"multi___2", "multi", "2", "Yellow",
"multi___3", "multi", "3", "Blue"
)

expect_equal(out, expected_out)
Expand All @@ -155,3 +156,53 @@ test_that("replace_true works", {

expect_equal(out, expected_out)
})

test_that("combine_checkboxes works for multiple checkbox fields", {
out <- combine_checkboxes(
supertbl = supertbl,
tbl = "nonrepeat_instrument",
cols = c(starts_with("multi"), starts_with("single_checkbox")),
values_to = c("new_col1", "new_col2"),
keep = FALSE
) %>%
pull(redcap_data) %>%
dplyr::first()

expected_out <- tibble::tribble(
~"study_id",~"extra_data", ~"new_col1", ~"new_col2",
1, 1, "Red", "Green",
2, 2, "Multiple", "Green",
3, 3, NA, NA
) %>%
mutate(
new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")),
new_col2 = factor(new_col2, levels = c("Green", "Multiple"))
)

expect_equal(out, expected_out)
})

test_that("combine_checkboxes works for multiple checkbox fields with logicals", {
out <- combine_checkboxes(
supertbl = supertbl,
tbl = "nonrepeat_instrument",
cols = c(starts_with("multi")| starts_with("single_checkbox")),
values_to = c("new_col1", "new_col2"),
keep = FALSE
) %>%
pull(redcap_data) %>%
dplyr::first()

expected_out <- tibble::tribble(
~"study_id",~"extra_data", ~"new_col1", ~"new_col2",
1, 1, "Red", "Green",
2, 2, "Multiple", "Green",
3, 3, NA, NA
) %>%
mutate(
new_col1 = factor(new_col1, levels = c("Red", "Yellow", "Blue", "Multiple")),
new_col2 = factor(new_col2, levels = c("Green", "Multiple"))
)

expect_equal(out, expected_out)
})

0 comments on commit ed55292

Please sign in to comment.