Skip to content

Commit

Permalink
check_equal_col_summaries() implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
rsh52 committed Aug 12, 2024
1 parent dcb1029 commit 0295650
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 15 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ importFrom(dplyr,if_any)
importFrom(dplyr,if_else)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n_distinct)
importFrom(dplyr,nth)
importFrom(dplyr,pull)
importFrom(dplyr,recode)
Expand Down
2 changes: 1 addition & 1 deletion R/REDCapTidieR-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' @importFrom cli cli_abort cli_fmt cli_text cli_vec cli_warn qty
#' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else
#' left_join mutate pull recode relocate rename right_join row_number rowwise
#' select slice summarise ungroup coalesce cur_column bind_cols first nth
#' select slice summarise ungroup coalesce cur_column bind_cols first nth n_distinct
#' @importFrom formattable percent
#' @importFrom lobstr obj_size
#' @importFrom lubridate is.difftime is.period is.POSIXt is.Date
Expand Down
36 changes: 36 additions & 0 deletions R/checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -741,3 +741,39 @@ check_fields_are_checkboxes <- function(metadata_tbl, call = caller_env()) {
)
}
}

#' @title Check equal distinct values between two columns
#'
#' @description
#' Takes a dataframe and two columns and checks if [n_distinct()] of the second
#' column is all unique based on grouping of the first column.
#'
#' @param data a dataframe
#' @param col1 a column to group by
#' @param col2 a column to check for uniqueness
#'
#' @keywords internal

check_equal_col_summaries <- function(data, col1, col2, call = caller_env()) {
check <- data %>%
summarise(
.by = {{ col1 }},
n = n_distinct({{ col2 }})
) %>%
pull(.data$n)

if (!all(check == 1)) {
values1 <- data[[eval_select(enquo(col1), data)]] # nolint: object_usage_linter
values2 <- data[[eval_select(enquo(col2), data)]] # nolint: object_usage_linter

msg <- c(
x = "Encountered unequal naming outputs.",
`!` = "{.code combine_checkboxes()} call resulted in column output: {.code {values1}} and new column output: {.code {values2}}." # nolint: line_length_linter
)

cli_abort(
msg,
class = c("unequal_col_summary", "REDCapTidieR_cond")
)
}
}
18 changes: 4 additions & 14 deletions R/combine_checkboxes.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,23 +154,10 @@ get_metadata_spec <- function(metadata_tbl,
# Similar to pivot_*, use of `names_glue` overrides use of names_prefix/sep
glue_env <- out %>%
select(.data$.value) %>%
mutate(.new_value = as.character(glue::glue_data(., names_glue))) %>% #nolint: object_usage_linter
mutate(.new_value = as.character(glue::glue_data(., names_glue))) %>% # nolint: object_usage_linter
select(.data$.new_value)

out <- cbind(out, glue_env)

# Enforce .new_value to be the same within each level of .value
values <- factor(out$.value, levels = unique(out$.value))

# Create a mapping of old levels to new levels
level_mapping <- setNames(unique(glue_env$.new_value), levels(values))

# Ensure new_values matches the levels of values
new_values <- factor(level_mapping[as.character(values)],
levels = unique(level_mapping)
)

out$.new_value <- as.character(new_values)
} else {
out <- out %>%
mutate(
Expand All @@ -180,6 +167,9 @@ get_metadata_spec <- function(metadata_tbl,
)
}

# Check that for each unique value of .value there is one unique value of .new_value
check_equal_col_summaries(out, .value, .new_value) # nolint: object_usage_linter

# Make sure selection is checkbox metadata field type
check_fields_are_checkboxes(out)

Expand Down
20 changes: 20 additions & 0 deletions man/check_equal_col_summaries.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 21 additions & 0 deletions tests/testthat/test-checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -293,3 +293,24 @@ test_that("check_fields_are_checkboxes works", {
expect_error(check_fields_are_checkboxes(metadata), class = "non_checkbox_fields")
expect_no_error(check_fields_are_checkboxes(metadata_filtered))
})

test_that("check_equal_col_summaries works", {
data <- tibble::tribble(
~"id", ~"col1", ~"col2",
1, "A", "A1",
2, "B", "B1",
3, "C", "C1"
)

expect_no_error(check_equal_col_summaries(data, col1, col2))

error_data <- tibble::tribble(
~"id", ~"col1", ~"col2",
1, "A", "A1",
2, "B", "B1",
3, "B", "B2"
)

check_equal_col_summaries(error_data, col1, col2) %>%
expect_error(class = "unequal_col_summary")
})

0 comments on commit 0295650

Please sign in to comment.