Skip to content

Commit

Permalink
Merge pull request #505 from OuhscBbmc/checkbox-choices-2
Browse files Browse the repository at this point in the history
Checkbox choices 2
  • Loading branch information
wibeasley authored Jul 15, 2023
2 parents 1bdde82 + 5f75564 commit 34f2154
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 53 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ This will help extract forms from longitudinal & repeating projects.
* `validate_repeat_instance()`
* `validate_no_logical()`
* `redcap_read()` checks the `event` parameter and throws an error if a value is not recognized, or the project is not longitudinal (#493)
* The regex in `regex_named_captures()` is forgiving if there's an unnecessary leading space (@BlairCooper, #495)
* The regex in `regex_named_captures()` is forgiving if there's an unnecessary leading space (@BlairCooper, #495, #501)
Version 1.1.0 (released 2022-08-10)
==========================================================
Expand Down
19 changes: 17 additions & 2 deletions R/metadata-utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@
#' choices_3 <- ds_metadata_3[ds_metadata_3$field_name=="race", "select_choices_or_calculations"]
#' REDCapR::regex_named_captures(pattern = pattern_boxes, text = choices_3)

#' @importFrom magrittr %>%
#' @export
regex_named_captures <- function(pattern, text, perl = TRUE) {
checkmate::assert_character(pattern, any.missing = FALSE, len = 1, min.chars = 0L)
Expand Down Expand Up @@ -110,11 +111,25 @@ regex_named_captures <- function(pattern, text, perl = TRUE) {
}

#' @rdname metadata_utilities
#' @importFrom rlang .data
#' @export
checkbox_choices <- function(select_choices) {
checkmate::assert_character(select_choices, any.missing=FALSE, len=1, min.chars=1)

pattern_checkboxes <- "(?<=\\A| \\| |\\| )(?<id>\\d{1,}), (?<label>[^|]{1,}?)(?= \\| |\\| |\\Z)"
pattern <- "^(.+?),\\s*+(.*)$"

regex_named_captures(pattern = pattern_checkboxes, text = select_choices)
select_choices %>%
strsplit(split = "|", fixed = TRUE) %>%
magrittr::extract2(1) %>%
base::trimws() %>%
tibble::as_tibble() %>% # default column name is `value`
dplyr::filter(.data$value != "") %>%
dplyr::transmute(
id = sub(pattern, "\\1", .data$value, perl = TRUE),
label = sub(pattern, "\\2", .data$value, perl = TRUE),
)

# pattern_checkboxes <- "(?<=\\A| \\| |\\| )(?<id>\\d{1,}), (?<label>[^|]{1,}?)(?= \\| |\\| |\\Z)"
# pattern_checkboxes <- "(?<=\\A| \\| |\\| | \\|)(?<id>\\d{1,}), ?(?<label>[^|]{1,}?)(?= \\| |\\| | \\||\\Z)"
# regex_named_captures(pattern = pattern_checkboxes, text = select_choices)
}
201 changes: 151 additions & 50 deletions tests/testthat/test-metadata-utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,55 +3,157 @@ library(testthat)
test_that("Named Captures", {
pattern_checkboxes <- "(?<=\\A| \\| )(?<id>\\d{1,}), (?<label>[\x20-\x7B\x7D-\x7E]{1,})(?= \\| |\\Z)"

ds_expected <-
tibble::tribble(
~id, ~label,
"1", "American Indian/Alaska Native",
"2", "Asian",
"3", "Native Hawaiian or Other Pacific Islander",
"4", "Black or African American",
"5", "White",
"6", "Unknown / Not Reported"
)

choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported"
ds_boxes <- regex_named_captures(pattern=pattern_checkboxes, text=choices_1)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4", "5", "6"),
label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -6L)
)

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes)
expect_s3_class(ds_boxes, "tbl")
})

test_that("checkbox choices", {
choices_1 <- "1, American Indian/Alaska Native | 2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 6, Unknown / Not Reported"
ds_boxes <- checkbox_choices(select_choices=choices_1)
test_that("checkbox choices -digits", {
ds_expected <-
tibble::tribble(
~id, ~label,
"1", "American Indian/Alaska Native",
"-2", "Asian",
"3", "Native Hawaiian or Other Pacific Islander",
"4", "Black or African American",
"5", "White",
"66", "Unknown / Not Reported"
)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4", "5", "6"),
label = c("American Indian/Alaska Native", "Asian", "Native Hawaiian or Other Pacific Islander", "Black or African American", "White", "Unknown / Not Reported")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -6L)
)
# well-behaved
"1, American Indian/Alaska Native | -2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 66, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct") #dput(ds_boxes)
expect_s3_class(ds_boxes, "tbl")
# no leading spaces
"1, American Indian/Alaska Native |-2, Asian |3, Native Hawaiian or Other Pacific Islander |4, Black or African American |5, White |66, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# no trailing spaces
"1, American Indian/Alaska Native| -2, Asian| 3, Native Hawaiian or Other Pacific Islander| 4, Black or African American| 5, White| 66, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no trailing spaces:")

# extra lines
"| | 1, American Indian/Alaska Native | | | -2, Asian | 3, Native Hawaiian or Other Pacific Islander | 4, Black or African American | 5, White | 66, Unknown / Not Reported | | | " |>
checkbox_choices() |>
expect_equal(ds_expected, label = "extra lines:")
})

test_that("checkbox choices -letters", {
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id, ~label,
"a", "American Indian/Alaska Native",
"b", "Asian",
"c", "Native Hawaiian or Other Pacific Islander",
"dd", "Black or African American",
"eee", "White",
"f", "Unknown / Not Reported"
)

# well-behaved
"a, American Indian/Alaska Native | b, Asian | c, Native Hawaiian or Other Pacific Islander | dd, Black or African American | eee, White | f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

# no leading spaces
"a, American Indian/Alaska Native |b, Asian |c, Native Hawaiian or Other Pacific Islander |dd, Black or African American |eee, White |f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# no trailing spaces
"a, American Indian/Alaska Native| b, Asian| c, Native Hawaiian or Other Pacific Islander| dd, Black or African American| eee, White| f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no trailing spaces:")
})

test_that("checkbox choices -commas in labels", {
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id , ~label,
"a" , "American Indian, Native American, or Alaska Native",
"b" , "Asian",
"c" , "Native Hawaiian, Samoan, or Other Pacific Islander",
"dd" , "Black or African American",
"eee" , "White",
"f" , "Unknown / Not Reported"
)

# well-behaved
"a, American Indian, Native American, or Alaska Native | b, Asian | c, Native Hawaiian, Samoan, or Other Pacific Islander | dd, Black or African American | eee, White | f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

# no leading spaces
"a, American Indian, Native American, or Alaska Native |b, Asian |c, Native Hawaiian, Samoan, or Other Pacific Islander |dd, Black or African American |eee, White |f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# no trailing spaces
"a, American Indian, Native American, or Alaska Native| b, Asian| c, Native Hawaiian, Samoan, or Other Pacific Islander| dd, Black or African American| eee, White| f, Unknown / Not Reported" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no trailing spaces:")
})

test_that("checkbox choices -digits only", {
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id , ~label,
"1" , "1",
"2" , "2",
"3" , "3",
"4" , "4"
)

# well-behaved
"1, 1 | 2, 2 | 3, 3 | 4, 4" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")

# missing leading space
"1, 1 | 2,2 | 3, 3 | 4, 4" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "missing leading space:")

# missing trailing spaces
"1, 1 | 2, 2| 3, 3| 4, 4" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "no leading spaces:")

# extra lines
"|1, 1 | 2, 2 | 3, 3 || 4, 4| |" |>
checkbox_choices() |>
expect_equal(ds_expected, label = "well-behaved:")
})

test_that("checkbox choices with special characters", {
choices_1 <- "1, Hospital A | 2, Hospitäl B | 3, Hospital Ç | 4, Hospítal D"
ds_boxes <- checkbox_choices(select_choices=choices_1)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4"),
label = c("Hospital A", "Hospitäl B", "Hospital Ç", "Hospítal D")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -4L)
)

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct")
expect_s3_class(ds_boxes, "tbl")
ds_expected <- # datapasta::tribble_paste(ds_expected)
tibble::tribble(
~id, ~label,
"1", "Hospital A",
"2", "Hospitäl B",
"3", "Hospital Ç",
"4", "Hospítal D"
)

"1, Hospital A | 2, Hospitäl B | 3, Hospital Ç | 4, Hospítal D" |>
checkbox_choices() |>
expect_equal(ds_expected)
})

###############################################################################
Expand All @@ -72,18 +174,17 @@ test_that("checkbox choices with special characters", {
# REDCap versions
###############################################################################
test_that("checkbox choices with errant space", {
choices_1 <- "1, Depressive mood disorder | 2, Adjustment disorder| 3, Personality disorder | 4, Anxiety | 0, Not Noted"
ds_boxes <- checkbox_choices(select_choices=choices_1)

ds_expected <- structure(
list(
id = c("1", "2", "3", "4", "0"),
label = c("Depressive mood disorder", "Adjustment disorder", "Personality disorder", "Anxiety", "Not Noted")
),
class = c("tbl_df", "tbl", "data.frame"),
row.names = c(NA, -5L)
)

expect_equal(ds_boxes, expected=ds_expected, label="The returned data.frame should be correct")
expect_s3_class(ds_boxes, "tbl")
ds_expected <-
tibble::tribble(
~id, ~label,
"1", "Depressive mood disorder",
"2", "Adjustment disorder",
"3", "Personality disorder",
"4", "Anxiety",
"0", "Not Noted"
)

"1, Depressive mood disorder | 2, Adjustment disorder| 3, Personality disorder | 4, Anxiety | 0, Not Noted" |>
checkbox_choices() |> # datapasta::tribble_paste()
expect_equal(ds_expected)
})

0 comments on commit 34f2154

Please sign in to comment.