-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #82 from hubverse-org/feature/handle-samples
Feature/ Handle V3 sample specification
- Loading branch information
Showing
83 changed files
with
10,130 additions
and
141 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
linters: linters_with_defaults( | ||
line_length_linter = line_length_linter(120L), | ||
commented_code_linter = NULL, | ||
object_length_linter = object_length_linter(length = 50L) | ||
object_length_linter = object_length_linter(length = 50L), | ||
cyclocomp_linter = cyclocomp_linter(complexity_limit = 20L) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
Package: hubValidations | ||
Title: Testing framework for hubverse hub validations | ||
Version: 0.0.1 | ||
Version: 0.1.0 | ||
Authors@R: c( | ||
person( | ||
given = "Anna", | ||
|
@@ -34,12 +34,12 @@ Imports: | |
checkmate, | ||
cli, | ||
config, | ||
dplyr, | ||
dplyr (>= 1.1.0), | ||
fs, | ||
gh, | ||
hubAdmin (>= 0.1.0), | ||
hubData (>= 0.1.0), | ||
hubUtils (>= 0.0.1), | ||
hubAdmin (>= 1.0.0), | ||
hubData (>= 0.2.0), | ||
hubUtils (>= 0.1.0), | ||
jsonlite, | ||
jsonvalidate, | ||
lubridate, | ||
|
@@ -60,18 +60,18 @@ Suggests: | |
testthis, | ||
withr | ||
Remotes: | ||
Infectious-Disease-Modeling-Hubs/hubUtils, | ||
Infectious-Disease-Modeling-Hubs/hubData, | ||
Infectious-Disease-Modeling-Hubs/hubAdmin, | ||
hubverse-org/hubUtils, | ||
hubverse-org/hubData, | ||
hubverse-org/hubAdmin, | ||
assignUser/octolog, | ||
apache/arrow/[email protected] | ||
Config/testthat/edition: 3 | ||
Config/Needs/website: pkgdown, Infectious-Disease-Modeling-Hubs/hubStyle | ||
Config/Needs/website: pkgdown, hubverse-org/hubStyle | ||
Encoding: UTF-8 | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.3.1 | ||
URL: https://github.com/Infectious-Disease-Modeling-Hubs/hubValidations, | ||
https://infectious-disease-modeling-hubs.github.io/hubValidations/ | ||
BugReports: https://github.com/Infectious-Disease-Modeling-Hubs/hubValidations/issues | ||
URL: https://github.com/hubverse-org/hubValidations, | ||
https://hubverse-org.github.io/hubValidations/ | ||
BugReports: https://github.com/hubverse-org/hubValidations/issues | ||
Depends: | ||
R (>= 3.5.0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
#' Check model output data tbl samples contain single unique values for each | ||
#' compound task ID within individual samples | ||
#' @param tbl a tibble/data.frame of the contents of the file being validated. Column types must **all be character**. | ||
#' @inherit check_tbl_colnames params | ||
#' @inherit check_tbl_colnames return | ||
#' @details Output of the check includes an `errors` element, a list of items, | ||
#' one for each sample failing validation, with the following structure: | ||
#' - `mt_id`: Index identifying the config modeling task the sample is associated with. | ||
#' - `output_type_id`: The output type ID of the sample that does not contain a | ||
#' single, unique value for each compound task ID. | ||
#' - `values`: The unique values of each compound task ID. | ||
#' See [hubverse documentation on samples](https://hubverse.io/en/latest/user-guide/sample-output-type.html) | ||
#' for more details. | ||
#' @export | ||
check_tbl_spl_compound_tid <- function(tbl, round_id, file_path, hub_path) { | ||
config_tasks <- hubUtils::read_config(hub_path, "tasks") | ||
|
||
if (isFALSE(has_spls_tbl(tbl)) || isFALSE(hubUtils::is_v3_config(config_tasks))) { | ||
return(skip_v3_spl_check(file_path)) | ||
} | ||
|
||
hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks) | ||
# TODO: Currently, samples must strictly match the compound task ID set expectations | ||
# and cannot handle coarser-grained compound task ID sets. | ||
n_tbl <- hash_tbl[hash_tbl$n_compound_idx > 1L, ] | ||
|
||
check <- nrow(n_tbl) == 0L | ||
|
||
if (check) { | ||
details <- NULL | ||
errors <- NULL | ||
} else { | ||
errors <- comptid_mismatch( | ||
n_tbl, tbl, config_tasks, round_id | ||
) | ||
output_type_ids <- purrr::map(errors, ~ .x$output_type_id) %>% # nolint: object_usage_linter | ||
purrr::flatten_chr() %>% | ||
unique() %>% | ||
sort() | ||
|
||
details <- cli::format_inline( | ||
"Sample{?s} {.val {output_type_ids}} d{?oes/o} not contain ", | ||
"unique compound task ID combinations. ", | ||
"See {.var errors} attribute for details." | ||
) | ||
} | ||
|
||
capture_check_cnd( | ||
check = check, | ||
file_path = file_path, | ||
msg_subject = "Each sample compound task ID", | ||
msg_attribute = "single, unique value.", | ||
msg_verbs = c("contains", "does not contain"), | ||
details = details, | ||
errors = errors, | ||
error = TRUE | ||
) | ||
} | ||
|
||
comptid_mismatch <- function(n_tbl, tbl, config_tasks, round_id) { | ||
tbl <- tbl[tbl$output_type == "sample", ] | ||
purrr::map( | ||
seq_along(n_tbl$output_type_id), | ||
~ { | ||
x <- n_tbl[.x, ] | ||
compound_taskids <- get_round_compound_task_ids(config_tasks, round_id)[[x$mt_id]] | ||
spl <- tbl[tbl$output_type_id == x$output_type_id, compound_taskids] %>% | ||
unique() | ||
|
||
values <- spl[, purrr::map_lgl(spl, ~ length(unique(.x)) > 1L)] %>% | ||
as.list() %>% | ||
purrr::map(unique) | ||
|
||
list( | ||
mt_id = x$mt_id, | ||
output_type_id = x$output_type_id, | ||
values = values | ||
) | ||
} | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
#' Check model output data tbl samples contain the appropriate number of samples | ||
#' for a given compound idx. | ||
#' @param tbl a tibble/data.frame of the contents of the file being validated. Column types must **all be character**. | ||
#' @inherit check_tbl_colnames params | ||
#' @inherit check_tbl_col_types return | ||
#' @details Output of the check includes an `errors` element, a list of items, | ||
#' one for each compound_idx failing validation, with the following structure: | ||
#' - `compound_idx`: the compound idx that failed validation of number of samples. | ||
#' - `n`: the number of samples counted for the compound idx. | ||
#' - `min_samples_per_task`: the minimum number of samples required for the compound idx. | ||
#' - `max_samples_per_task`: the maximum number of samples required for the compound idx. | ||
#' - `compound_idx_tbl`: a tibble of the expected structure for samples belonging to | ||
#' the compound idx. | ||
#' See [hubverse documentation on samples](https://hubverse.io/en/latest/user-guide/sample-output-type.html) | ||
#' for more details. | ||
#' @export | ||
check_tbl_spl_n <- function(tbl, round_id, file_path, hub_path) { | ||
config_tasks <- hubUtils::read_config(hub_path, "tasks") | ||
|
||
if (isFALSE(has_spls_tbl(tbl)) || isFALSE(hubUtils::is_v3_config(config_tasks))) { | ||
return(skip_v3_spl_check(file_path)) | ||
} | ||
|
||
hash_tbl <- spl_hash_tbl(tbl, round_id, config_tasks) | ||
n_ranges <- get_round_spl_n_ranges(config_tasks, round_id) | ||
|
||
n_tbl <- dplyr::group_by(hash_tbl, .data$compound_idx) %>% | ||
dplyr::summarise( | ||
n = dplyr::n_distinct(.data$output_type_id), | ||
mt_id = unique(.data$mt_id) | ||
) %>% | ||
dplyr::left_join(n_ranges, by = "mt_id") %>% | ||
dplyr::mutate( | ||
less = .data$n < .data$n_min, | ||
more = .data$n > .data$n_max, | ||
out_range = .data$less | .data$more | ||
) %>% | ||
dplyr::filter(.data$out_range) | ||
|
||
check <- nrow(n_tbl) == 0L | ||
|
||
if (check) { | ||
details <- NULL | ||
errors <- NULL | ||
} else { | ||
errors <- n_mismatch_errors( | ||
n_tbl, hash_tbl, tbl | ||
) | ||
details <- n_mismatch_details(n_tbl) | ||
} | ||
|
||
capture_check_cnd( | ||
check = check, | ||
file_path = file_path, | ||
msg_subject = "Required samples per compound idx task", | ||
msg_attribute = NULL, | ||
msg_verbs = c("present.", "not present."), | ||
details = details, | ||
errors = errors | ||
) | ||
} | ||
|
||
get_round_spl_n_ranges <- function(config_tasks, round_id) { | ||
round_mt <- hubUtils::get_round_model_tasks(config_tasks, round_id) | ||
purrr::imap( | ||
round_mt, | ||
~ { | ||
output_type_id_params <- purrr::pluck( | ||
.x, | ||
"output_type", | ||
"sample", | ||
"output_type_id_params" | ||
) | ||
|
||
if (is.null(output_type_id_params)) { | ||
return(NULL) | ||
} else { | ||
tibble::tibble( | ||
mt_id = .y, | ||
n_min = output_type_id_params$min_samples_per_task, | ||
n_max = output_type_id_params$max_samples_per_task | ||
) | ||
} | ||
} | ||
) %>% | ||
purrr::list_rbind() | ||
} | ||
|
||
n_mismatch_errors <- function(n_tbl, hash_tbl, tbl) { | ||
tbl <- tbl[tbl$output_type == "sample", names(tbl) != "value"] | ||
purrr::map( | ||
purrr::set_names(n_tbl$compound_idx), | ||
~ { | ||
spl_d <- hash_tbl$output_type_id[hash_tbl$compound_idx == .x][1L] | ||
compound_idx_tbl <- tbl[ | ||
tbl$output_type_id == spl_d, | ||
setdiff(names(tbl), c("output_type_id", "value", "output_type")) | ||
] | ||
row <- n_tbl[n_tbl$compound_idx == .x, ] %>% as.vector() | ||
list( | ||
compound_idx = .x, | ||
n = row$n, | ||
min_samples_per_task = row$n_min, | ||
max_samples_per_task = row$n_max, | ||
compound_idx_tbl = compound_idx_tbl | ||
) | ||
} | ||
) | ||
} | ||
|
||
|
||
n_mismatch_details <- function(n_tbl) { | ||
cat_msg <- function(compound_idx, type) { # nolint: object_usage_linter | ||
switch(type, | ||
less = paste0( | ||
"File contains less than the minimum required number of samples per task ", | ||
"for compound idx{?s} {.val {compound_idx}}" | ||
), | ||
more = paste0( | ||
"File contains more than the maximum required number of samples per task ", | ||
"for compound idx{?s} {.val {compound_idx}}" | ||
) | ||
) %>% cli::format_inline() | ||
} | ||
|
||
purrr::map( | ||
c("less", "more"), | ||
~ { | ||
compound_idx <- n_tbl[n_tbl[[.x]], "compound_idx", drop = TRUE] | ||
if (length(compound_idx) == 0L) { | ||
return(NULL) | ||
} else { | ||
cat_msg(compound_idx, .x) | ||
} | ||
} | ||
) %>% | ||
purrr::compact() %>% | ||
c("See {.var errors} attribute for details.") %>% | ||
paste(collapse = ". ") | ||
} |
Oops, something went wrong.