Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

combine_checkboxes #196

Merged
merged 39 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
9b0471b
Reduce function initial draft
Jul 10, 2024
859926f
Reduce function fixes
Jul 11, 2024
c42399d
Small fixes
Jul 11, 2024
8862ac1
Draft tests, add no_val param
Jul 12, 2024
96c3309
Add keep param
Jul 12, 2024
218fca4
Fix `keep` param
Jul 15, 2024
54e3a99
Update documentation and API
Jul 15, 2024
ec5c19d
Update combine_checkbox api and docs
Jul 15, 2024
6b1fb48
Add check for if no fields exist in selection
Jul 15, 2024
d55dc00
Add check_fields_are_checkboxes function
Jul 15, 2024
261342d
Minor cleaning
Jul 15, 2024
e1d4eb8
Update version, test recheck workflow
Jul 15, 2024
7207f09
Test recheck workflow file
Jul 15, 2024
4f861e1
Fix linting
Jul 15, 2024
eb11152
Add combine_checkboxes() to pkgdown
Jul 15, 2024
7348324
Remove revdepcheck, update renv
Jul 15, 2024
62080af
Add standard checks for params
Jul 15, 2024
522d01d
Filename update
Jul 16, 2024
3a395cf
Filename change
Jul 16, 2024
347d2a3
Rename test file
rsh52 Jul 17, 2024
cce0d12
Fix record_id_field assign, remove rowwise call
rsh52 Jul 17, 2024
c250eda
Remove instrument_identifiers, use bind_cols
rsh52 Jul 17, 2024
b0a8564
Implement parse_labels, clean code, fix tests
rsh52 Jul 17, 2024
21f8879
Remove record_id field, lint
rsh52 Jul 17, 2024
31797c6
Apply additional cleanup suggestions
rsh52 Jul 18, 2024
2dfac9a
Add extract_metadata fnctn, tests
rsh52 Jul 18, 2024
ed55292
Support multiple values_to, logicals, new checks
Jul 24, 2024
c0b3885
Linting
Jul 24, 2024
7789a22
Update API, clean up, new methods, new docs
Jul 29, 2024
c185e39
Add check_metadata_fields_exist, update details
Aug 2, 2024
abdc512
Consoldiate and rework checkbox value conversion
Aug 2, 2024
50d47d6
Add names_repair strategy support
Aug 5, 2024
a6d150d
Remove names_suffix, restructure prefix/sep
Aug 5, 2024
0f868b8
Add names_glue spec
Aug 5, 2024
abefbee
Add glue support with names_glue
Aug 5, 2024
06d1337
Make glue dependency, remove install check
rsh52 Aug 12, 2024
dcb1029
Update glue spec handling
rsh52 Aug 12, 2024
0295650
check_equal_col_summaries() implementation
rsh52 Aug 12, 2024
127dd46
Update error message check_equal_col_summaries()
rsh52 Aug 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/recheck.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
on:
workflow_dispatch:
inputs:
which:
type: choice
description: Which dependents to check
options:
- strong
- most

name: Reverse dependency check

jobs:
revdep_check:
name: Reverse check ${{ inputs.which }} dependents
uses: r-devel/recheck/.github/workflows/recheck.yml@v1
with:
which: ${{ inputs.which }}
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: REDCapTidieR
Type: Package
Title: Extract 'REDCap' Databases into Tidy 'Tibble's
Version: 1.1.1
Version: 1.2.0
Authors@R: c(
person("Richard", "Hanna", , "[email protected]", role = c("aut", "cre"),
comment = c(ORCID = "0009-0005-6496-8154")),
Expand Down Expand Up @@ -52,5 +52,5 @@ Config/testthat/edition: 3
Encoding: UTF-8
Language: en-US
LazyData: true
RoxygenNote: 7.3.1
RoxygenNote: 7.3.2
Roxygen: list(markdown = TRUE)
13 changes: 13 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ S3method(tbl_sum,redcap_supertbl)
S3method(vec_ptype_abbr,redcap_supertbl)
export(add_skimr_metadata)
export(bind_tibbles)
export(combine_checkboxes)
export(extract_tibble)
export(extract_tibbles)
export(fmt_strip_field_embedding)
Expand Down Expand Up @@ -40,6 +41,8 @@ importFrom(dplyr,"%>%")
importFrom(dplyr,across)
importFrom(dplyr,bind_rows)
importFrom(dplyr,case_when)
importFrom(dplyr,coalesce)
importFrom(dplyr,cur_column)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,if_any)
Expand All @@ -50,10 +53,13 @@ importFrom(dplyr,pull)
importFrom(dplyr,recode)
importFrom(dplyr,relocate)
importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,row_number)
importFrom(dplyr,rowwise)
importFrom(dplyr,select)
importFrom(dplyr,slice)
importFrom(dplyr,summarise)
importFrom(dplyr,ungroup)
importFrom(formattable,percent)
importFrom(lobstr,obj_size)
importFrom(lubridate,is.Date)
Expand All @@ -66,6 +72,7 @@ importFrom(purrr,discard)
importFrom(purrr,flatten_chr)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map2_chr)
importFrom(purrr,map_int)
importFrom(purrr,map_lgl)
importFrom(purrr,pluck)
Expand All @@ -80,6 +87,7 @@ importFrom(readr,parse_integer)
importFrom(readr,parse_logical)
importFrom(readr,parse_time)
importFrom(rlang,"!!!")
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(rlang,abort)
importFrom(rlang,as_closure)
Expand All @@ -95,6 +103,7 @@ importFrom(rlang,enexpr)
importFrom(rlang,enquo)
importFrom(rlang,env_poke)
importFrom(rlang,eval_tidy)
importFrom(rlang,expr)
importFrom(rlang,get_env)
importFrom(rlang,global_env)
importFrom(rlang,is_atomic)
Expand All @@ -103,6 +112,9 @@ importFrom(rlang,is_bare_list)
importFrom(rlang,is_installed)
importFrom(rlang,new_environment)
importFrom(rlang,quo_get_expr)
importFrom(rlang,quo_name)
importFrom(rlang,sym)
importFrom(rlang,syms)
importFrom(rlang,try_fetch)
importFrom(rlang,zap)
importFrom(stats,na.omit)
Expand All @@ -121,6 +133,7 @@ importFrom(tidyr,complete)
importFrom(tidyr,fill)
importFrom(tidyr,nest)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,separate_wider_delim)
importFrom(tidyr,unnest)
importFrom(tidyr,unnest_wider)
importFrom(tidyselect,all_of)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# REDCapTidieR 1.2.0

# REDCapTidieR 1.1.1 (development version)

Version 1.1.1
Expand Down
14 changes: 9 additions & 5 deletions R/REDCapTidieR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,26 @@
#' expect_factor expect_logical
#' @importFrom cli cli_abort cli_fmt cli_text cli_vec cli_warn qty
#' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else
#' left_join mutate pull recode relocate rename row_number select slice summarise
#' left_join mutate pull recode relocate rename right_join row_number rowwise
#' select slice summarise ungroup coalesce cur_column
#' @importFrom formattable percent
#' @importFrom lobstr obj_size
#' @importFrom lubridate is.difftime is.period is.POSIXt is.Date
#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some pmap discard flatten_chr
#' @importFrom purrr compose map map2 map_int map_lgl pluck pmap_chr some pmap
#' discard flatten_chr map2_chr
#' @importFrom REDCapR redcap_arm_export redcap_event_instruments redcap_instruments
#' redcap_metadata_read redcap_read_oneshot sanitize_token
#' @importFrom rlang .data !!! abort as_closure caller_arg caller_env catch_cnd
#' check_installed cnd_muffle current_call current_env enexpr enquo env_poke
#' eval_tidy get_env global_env is_atomic is_bare_formula is_bare_list
#' is_installed new_environment quo_get_expr try_fetch zap as_label
#' eval_tidy get_env global_env is_atomic is_bare_formula is_bare_list quo_name
#' is_installed new_environment quo_get_expr try_fetch zap as_label sym syms expr
#' :=
#' @importFrom stringi stri_split_fixed
#' @importFrom stringr str_detect str_replace str_replace_all str_squish str_trunc
#' str_trim str_ends
#' @importFrom tibble as_tibble is_tibble tibble
#' @importFrom tidyr complete fill pivot_wider nest unnest unnest_wider
#' @importFrom tidyr complete fill pivot_wider nest separate_wider_delim unnest
#' unnest_wider
#' @importFrom tidyselect all_of any_of ends_with eval_select everything
#' starts_with where
#' @importFrom vctrs vec_ptype_abbr vec_ptype
Expand Down
53 changes: 53 additions & 0 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -659,3 +659,56 @@ check_extra_field_values_message <- function(extra_field_values, call = caller_e
values = values
)
}

#' @title
#' Check fields exist for checkbox combination
#'
#' @param fields Vector of character strings to check the length of
#' @param expr A quosure expression
#' @param call The calling environment to use in the error message
#'
#' @keywords internal
check_fields_exist <- function(fields, expr, call = caller_env()) {
expr <- quo_name(expr)

if (length(fields) == 0) {
msg <- c(
x = "No fields detected using `{expr}`.",
i = "Ensure that the column names specified in {.arg cols} match the columns in your data. Check for typos or use {.pkg tidyselect} helpers like {.code starts_with()}, `contains()`, etc." # nolint: line_length_linter
)

cli_abort(
msg,
class = c("missing_checkbox_fields", "REDCapTidieR_cond")
)
}
}


#' @title
#' Check fields are of checkbox field type
#'
#' @param metadata_tbl A metadata tibble from a supertibble
#' @param call The calling environment to use in the error message
#'
#' @keywords internal

check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) {
non_checkboxes <- metadata_tbl %>%
filter(.data$field_type != "checkbox")

if (nrow(non_checkboxes) > 0) {
non_checkboxes <- non_checkboxes %>%
pull(.data$field_name)

msg <- c(
x = "Non-checkbox fields selected for {.code form_name}",
`!` = "The following fields returned as non-checkbox field types: {.code {non_checkboxes}}"
)

cli_abort(
msg,
class = c("non_checkbox_fields", "REDCapTidieR_cond")
)
}
}
192 changes: 192 additions & 0 deletions R/combine_checkboxes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
#' @title Combine Checkbox Fields into a Single Column
#'
#' @description
#' [combine_checkboxes()] consolidates multiple checkbox fields in a REDCap data
#' tibble into a single column. This transformation simplifies analysis by
#' combining multiple binary columns into a singular and informative labelled
#' factor column.
#'
#' @param supertbl A supertibble generated by [read_redcap()]. Required.
#' @param tbl The `redcap_form_name` of the data tibble to extract. Required.
#' @param cols <[`tidy-select`][tidyr_tidy_select]> Checbox columns to combine to
#' single column. Required.
#' @param values_to A string specifying the name of the column to combine checkbox
#' values under. Required.
#' @param multi_value_label A string specifying the value to be used when multiple
#' checkbox fields are selected. Default "Multiple".
#' @param values_fill Value to use when no checkboxes are selected. Default `NA`.
#' @param raw_or_label Either 'raw' or 'label' to specify whether to use raw coded
#' values or labels for the options. Default 'label'.
#' @param keep Logical indicating whether to keep the original checkbox fields in
#' the output. Default `TRUE`.
#'
#' @return A modified supertibble.
#'
#' @examples
#' \dontrun{
#' supertbl <- read_redcap(redcap_uri, token)
#' combined_tbl <- combine_checkboxes(
#' supertbl = supertbl,
#' tbl = "demographics",
#' cols = starts_with("race"),
#' values_to = "race_combined",
#' multi_value_label = "Multiple",
#' values_fill = NA
#' )
#' }
#'
#' @export

combine_checkboxes <- function(supertbl,
tbl,
cols,
values_to,
multi_value_label = "Multiple",
values_fill = NA,
raw_or_label = "label",
keep = TRUE) {

# Check args ---
check_arg_is_supertbl(supertbl, req_cols = c("redcap_data", "redcap_metadata"))
check_arg_is_character(tbl, len = 1, any.missing = FALSE)
check_arg_is_character(values_to, len = 1, any.missing = FALSE)
check_arg_is_character(multi_value_label, len = 1, any.missing = TRUE)
check_arg_is_character(values_fill, len = 1, any.missing = TRUE)
check_arg_choices(raw_or_label, choices = c("label", "raw"))
check_arg_is_logical(keep, len = 1, any.missing = FALSE)

# Save user cols to enquosure
cols_exp <- enquo(cols)

# Extract tbl from supertbl
data_tbl <- supertbl %>%
extract_tibble(tbl)

# Get field names from cols_exp, check that fields exist
field_names <- names(eval_select(cols_exp, data = data_tbl))
check_fields_exist(fields = field_names, expr = cols_exp)

# Assume the first instrument in the metadata contains IDs
# REDCap enforces this constraints, we reflect this in read_redcap -> get_field_to_drop
record_id_field <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]]$field_name[1]
rsh52 marked this conversation as resolved.
Show resolved Hide resolved

# Combine record identifier with remaining possible project identifiers
instrument_identifiers <- c(
record_id_field,
"redcap_form_instance",
"redcap_form_name",
"redcap_event",
"redcap_event_instance"
)

# Define values_to as the count of TRUEs/1s for the given checkbox field
# Assign TRUE if multiple selections made, and FALSE if one or zero made
data_tbl_mod <- data_tbl %>%
select(any_of(instrument_identifiers), !!!eval_select(cols_exp, data_tbl)) %>%
mutate(
!!values_to := case_when(
rowSums(select(., eval_tidy(cols_exp))) > 1 ~ TRUE,
TRUE ~ FALSE
)
)
rsh52 marked this conversation as resolved.
Show resolved Hide resolved

# Get metadata reference table, check that chosen fields are checkboxes
metadata <- get_metadata_ref(data_tbl_mod, supertbl, tbl, instrument_identifiers)

# Replace TRUEs/1s with raw/label values from metadata
data_tbl_mod <- data_tbl_mod %>%
mutate(across(
-c(any_of(instrument_identifiers), !!values_to),
~ replace_true(.x,
cur_column(),
metadata = metadata,
raw_or_label = raw_or_label
)
))

# Convert values_to from TRUE/FALSE to multi_value_label or identified single val
data_tbl_mod <- data_tbl_mod %>%
mutate(
across(field_names, as.character), # enforce to character strings
across(!!values_to, ~ as.character(.))
rsh52 marked this conversation as resolved.
Show resolved Hide resolved
) %>%
rowwise() %>%
rsh52 marked this conversation as resolved.
Show resolved Hide resolved
mutate(
!!values_to := ifelse(!!sym(values_to) == "TRUE",
multi_value_label,
coalesce(!!!syms(field_names))
),
!!values_to := ifelse(is.na(!!sym(values_to)), values_fill, !!sym(values_to))
) %>%
ungroup() %>%
select(any_of(instrument_identifiers), !!values_to) %>%
mutate(
!!values_to := factor(!!sym(values_to), levels = c(metadata[[raw_or_label]], multi_value_label, values_fill))
)

# Join back onto original data tbl
data_tbl_mod <- data_tbl_mod %>%
right_join(data_tbl, by = intersect(instrument_identifiers, names(data_tbl_mod))) %>%
relocate(!!values_to, .after = everything())

# Keep or remove original multi columns
if (!keep) {
data_tbl_mod <- data_tbl_mod %>%
select(-field_names)
}

# Update the supertbl data tibble
supertbl$redcap_data[supertbl$redcap_form_name == tbl][[1]] <- data_tbl_mod

supertbl
}

#' @title Utility function for getting metadata raw and label values for checkboxes
#'
#' @param data a data tibble
#' @param supertbl A supertibble generated by [read_redcap()].
#' @param tbl The name of the REDCap form (instrument) containing the checkbox
#' fields.
#' @param instrument_identifiers Character string vector of project record identifier vars
#'
#' @returns a tibble
#'
#' @keywords internal
get_metadata_ref <- function(data,
rsh52 marked this conversation as resolved.
Show resolved Hide resolved
supertbl,
tbl,
instrument_identifiers) {
# Create a metadata reference table linking field name to raw and label values
out <- supertbl$redcap_metadata[supertbl$redcap_form_name == tbl][[1]] %>%
rsh52 marked this conversation as resolved.
Show resolved Hide resolved
filter(.data$field_name %in% names(data)[!names(data) %in% instrument_identifiers])

# Make sure selection is checkbox metadata field type
check_fields_are_checkboxes(out)

out <- out %>%
select(.data$field_name, .data$select_choices_or_calculations) %>%
mutate(
original_field = sub("___.*$", "", .data$field_name)
) %>%
mutate(
pairs = strsplit(.data$select_choices_or_calculations, " \\| "),
label_value = NA,
label_value = purrr::map2_chr(pairs, row_number(), \(.x, .y) .x[.y])
)
rsh52 marked this conversation as resolved.
Show resolved Hide resolved

out %>%
separate_wider_delim(.data$label_value, delim = ", ", names = c("raw", "label")) %>%
select(.data$field_name, .data$raw, .data$label)
}

#' @noRd
#' @keywords internal
replace_true <- function(col, col_name, metadata, raw_or_label) {
# Replace TRUEs/1s with the appropriate raw or label value from the metadata
replacement <- metadata %>%
filter(.data$field_name == col_name) %>%
pull(raw_or_label)
col <- ifelse(col == TRUE, replacement, NA) # col == TRUE works for raw or label because TRUE == 1 & 1 == TRUE
# Convert non-TRUEs to NA, since values can be either "FALSE" or "0" for unchecked values
return(col)
}
Loading
Loading