From 5014c248cb9ecc58f4411db9c9ed385a7e160abf Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Mon, 2 Oct 2023 18:11:03 +0300 Subject: [PATCH 1/7] wrap checks in try_check --- R/check_tbl_match_round_id.R | 2 +- R/check_tbl_unique_round_id.R | 2 +- R/try_check.R | 2 +- R/validate_model_data.R | 138 ++++++++++++++++++++-------------- R/validate_model_file.R | 54 +++++++------ R/validate_model_metadata.R | 49 +++++++----- R/validate_submission.R | 6 +- 7 files changed, 153 insertions(+), 100 deletions(-) diff --git a/R/check_tbl_match_round_id.R b/R/check_tbl_match_round_id.R index ae0f3dd1..6dce8957 100644 --- a/R/check_tbl_match_round_id.R +++ b/R/check_tbl_match_round_id.R @@ -10,7 +10,7 @@ check_tbl_match_round_id <- function(tbl, file_path, hub_path, if (is_info(check_round_id_col)) { return(check_round_id_col) } - if (is_failure(check_round_id_col)) { + if (is_failure(check_round_id_col) | is_exec_error(check_round_id_col)) { class(check_round_id_col)[1] <- "check_error" check_round_id_col$call <- rlang::call_name(rlang::current_call()) return(check_round_id_col) diff --git a/R/check_tbl_unique_round_id.R b/R/check_tbl_unique_round_id.R index ba84501d..8cf0851f 100644 --- a/R/check_tbl_unique_round_id.R +++ b/R/check_tbl_unique_round_id.R @@ -28,7 +28,7 @@ check_tbl_unique_round_id <- function(tbl, file_path, hub_path, if (is_info(check_round_id_col)) { return(check_round_id_col) } - if (is_failure(check_round_id_col)) { + if (is_failure(check_round_id_col) | is_exec_error(check_round_id_col)) { class(check_round_id_col)[1] <- "check_error" check_round_id_col$call <- rlang::call_name(rlang::current_call()) return(check_round_id_col) diff --git a/R/try_check.R b/R/try_check.R index 020c3efb..61eccec8 100644 --- a/R/try_check.R +++ b/R/try_check.R @@ -15,7 +15,7 @@ try_check <- function(expr, file_path) { return( capture_exec_error( file_path = file_path, - msg = msg, + msg = paste("EXEC ERROR:", msg), call = get_expr_call_name(expr) ) ) diff --git a/R/validate_model_data.R b/R/validate_model_data.R index 5fb7ef65..14e59712 100644 --- a/R/validate_model_data.R +++ b/R/validate_model_data.R @@ -17,11 +17,13 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, round_id <- file_meta$round_id # -- File parsing checks ---- - checks$file_read <- check_file_read( - file_path = file_path, - hub_path = hub_path + checks$file_read <- try_check( + check_file_read( + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$file_read)) { + if (is_any_error(checks$file_read)) { return(checks) } @@ -33,93 +35,115 @@ validate_model_data <- function(hub_path, file_path, round_id_col = NULL, # -- File round ID checks ---- # Will be skipped if round config round_id_from_var is FALSE and no round_id_col # value is explicitly specified. - checks$valid_round_id_col <- check_valid_round_id_col( - tbl, - round_id_col = round_id_col, - file_path = file_path, - hub_path = hub_path + checks$valid_round_id_col <- try_check( + check_valid_round_id_col( + tbl, + round_id_col = round_id_col, + file_path = file_path, + hub_path = hub_path + ), file_path ) # check_valid_round_id_col is run at the top of this function and if it does # not explicitly succeed (i.e. either fails or is is skipped), the output of # check_valid_round_id_col() is returned. - checks$unique_round_id <- check_tbl_unique_round_id( - tbl, - round_id_col = round_id_col, - file_path = file_path, - hub_path = hub_path + checks$unique_round_id <- try_check( + check_tbl_unique_round_id( + tbl, + round_id_col = round_id_col, + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$unique_round_id)) { + if (is_any_error(checks$unique_round_id)) { return(checks) } - checks$match_round_id <- check_tbl_match_round_id( - tbl, - round_id_col = round_id_col, - file_path = file_path, - hub_path = hub_path + checks$match_round_id <- try_check( + check_tbl_match_round_id( + tbl, + round_id_col = round_id_col, + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$match_round_id)) { + if (is_any_error(checks$match_round_id)) { return(checks) } # -- Column level checks ---- - checks$colnames <- check_tbl_colnames( - tbl, - round_id = round_id, - file_path = file_path, - hub_path = hub_path + checks$colnames <- try_check( + check_tbl_colnames( + tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$colnames)) { + if (is_any_error(checks$colnames)) { return(checks) } - checks$col_types <- check_tbl_col_types( - tbl, - file_path = file_path, - hub_path = hub_path + checks$col_types <- try_check( + check_tbl_col_types( + tbl, + file_path = file_path, + hub_path = hub_path + ), file_path ) # -- Row level checks ---- - checks$valid_vals <- check_tbl_values( - tbl, - round_id = round_id, - file_path = file_path, - hub_path = hub_path + checks$valid_vals <- try_check( + check_tbl_values( + tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$valid_vals)) { + if (is_any_error(checks$valid_vals)) { return(checks) } - checks$rows_unique <- check_tbl_rows_unique( - tbl, - file_path = file_path, - hub_path = hub_path + checks$rows_unique <- try_check( + check_tbl_rows_unique( + tbl, + file_path = file_path, + hub_path = hub_path + ), file_path ) - checks$req_vals <- check_tbl_values_required( - tbl, - round_id = round_id, - file_path = file_path, - hub_path = hub_path + checks$req_vals <- try_check( + check_tbl_values_required( + tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), file_path ) # -- Value column checks ---- - checks$value_col_valid <- check_tbl_value_col( - tbl, - round_id = round_id, - file_path = file_path, - hub_path = hub_path + checks$value_col_valid <- try_check( + check_tbl_value_col( + tbl, + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), file_path ) - checks$value_col_non_desc <- check_tbl_value_col_ascending( - tbl, - file_path = file_path + checks$value_col_non_desc <- try_check( + check_tbl_value_col_ascending( + tbl, + file_path = file_path + ), file_path ) - checks$value_col_sum1 <- check_tbl_value_col_sum1( - tbl, - file_path = file_path + checks$value_col_sum1 <- try_check( + check_tbl_value_col_sum1( + tbl, + file_path = file_path + ), file_path ) custom_checks <- execute_custom_checks( diff --git a/R/validate_model_file.R b/R/validate_model_file.R index d835738c..06ed9f72 100644 --- a/R/validate_model_file.R +++ b/R/validate_model_file.R @@ -20,47 +20,59 @@ validate_model_file <- function(hub_path, file_path, validations_cfg_path = NULL) { checks <- new_hub_validations() - checks$file_exists <- check_file_exists( - file_path = file_path, - hub_path = hub_path + checks$file_exists <- try_check( + check_file_exists( + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$file_exists)) { + if (is_any_error(checks$file_exists)) { return(checks) } - checks$file_name <- check_file_name(file_path) - if (is_error(checks$file_name)) { + checks$file_name <- try_check( + check_file_name(file_path), file_path + ) + if (is_any_error(checks$file_name)) { return(checks) } - checks$file_location <- check_file_location(file_path) + checks$file_location <- try_check( + check_file_location(file_path), file_path + ) file_meta <- parse_file_name(file_path) round_id <- file_meta$round_id - checks$round_id_valid <- check_valid_round_id( - round_id = round_id, - file_path = file_path, - hub_path = hub_path + checks$round_id_valid <- try_check( + check_valid_round_id( + round_id = round_id, + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$round_id_valid)) { + if (is_any_error(checks$round_id_valid)) { return(checks) } - checks$file_format <- check_file_format( - file_path = file_path, - hub_path = hub_path, - round_id = round_id + checks$file_format <- try_check( + check_file_format( + file_path = file_path, + hub_path = hub_path, + round_id = round_id + ), file_path ) - if (is_error(checks$file_format)) { + if (is_any_error(checks$file_format)) { return(checks) } - checks$metadata_exists <- check_submission_metadata_file_exists( - hub_path = hub_path, - file_path = file_path + checks$metadata_exists <- try_check( + check_submission_metadata_file_exists( + hub_path = hub_path, + file_path = file_path + ), file_path ) - if (is_error(checks$metadata_exists)) { + if (is_any_error(checks$metadata_exists)) { return(checks) } diff --git a/R/validate_model_metadata.R b/R/validate_model_metadata.R index f25b2b33..fe2b45a1 100644 --- a/R/validate_model_metadata.R +++ b/R/validate_model_metadata.R @@ -16,37 +16,52 @@ validate_model_metadata <- function(hub_path, file_path, round_id = "default", validations_cfg_path = NULL) { checks <- new_hub_validations() - checks$metadata_schema_exists <- check_metadata_schema_exists(hub_path) - if (is_error(checks$metadata_schema_exists)) { + checks$metadata_schema_exists <- try_check( + check_metadata_schema_exists(hub_path), file_path + ) + if (is_any_error(checks$metadata_schema_exists)) { return(checks) } - checks$metadata_file_exists <- check_metadata_file_exists( - file_path = file_path, - hub_path = hub_path + checks$metadata_file_exists <- try_check( + check_metadata_file_exists( + file_path = file_path, + hub_path = hub_path + ), file_path ) - if (is_error(checks$metadata_file_exists)) { + if (is_any_error(checks$metadata_file_exists)) { return(checks) } - checks$metadata_file_ext <- check_metadata_file_ext(file_path) - checks$metadata_file_location <- check_metadata_file_location(file_path) - if (is_error(checks$metadata_file_location) || - is_error(checks$metadata_file_ext)) { + checks$metadata_file_ext <- try_check( + check_metadata_file_ext(file_path), file_path + ) + checks$metadata_file_location <- try_check( + check_metadata_file_location(file_path), file_path + ) + if (is_any_error(checks$metadata_file_location) || + is_any_error(checks$metadata_file_ext)) { return(checks) } - checks$metadata_matches_schema <- check_metadata_matches_schema( - file_path = file_path, - hub_path = hub_path) - if (is_error(checks$metadata_matches_schema)) { + checks$metadata_matches_schema <- try_check( + check_metadata_matches_schema( + file_path = file_path, + hub_path = hub_path + ), file_path + ) + if (is_any_error(checks$metadata_matches_schema)) { return(checks) } # file name matches model id specified in metadata file - checks$metadata_file_name <- check_metadata_file_name(file_path = file_path, - hub_path = hub_path) - if (is_error(checks$metadata_file_name)) { + checks$metadata_file_name <- try_check( + check_metadata_file_name( + file_path = file_path, + hub_path = hub_path + ), file_path + ) + if (is_any_error(checks$metadata_file_name)) { return(checks) } diff --git a/R/validate_submission.R b/R/validate_submission.R index d1b4f07a..2ceddf99 100644 --- a/R/validate_submission.R +++ b/R/validate_submission.R @@ -16,10 +16,12 @@ validate_submission <- function(hub_path, file_path, round_id_col = NULL, validations_cfg_path = NULL, skip_submit_window_check = FALSE, skip_check_config = FALSE) { - check_hub_config <- new_hub_validations() if (!skip_check_config) { - check_hub_config$valid_config <- check_config_hub_valid(hub_path) + check_hub_config$valid_config <- try_check( + check_config_hub_valid(hub_path), + file_path + ) if (not_pass(check_hub_config$valid_config)) { return(check_hub_config) } From 1bd9584a4425b1aca5e9a360537fa9b02de6585a Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Tue, 3 Oct 2023 10:35:52 +0300 Subject: [PATCH 2/7] Add success message to check_for_errors. Resolves #41 --- R/check_for_errors.R | 1 + tests/testthat/test-validate_pr.R | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/check_for_errors.R b/R/check_for_errors.R index 5ea2e2ba..11275a0e 100644 --- a/R/check_for_errors.R +++ b/R/check_for_errors.R @@ -22,5 +22,6 @@ check_for_errors <- function(x) { ) } + cli::cli_alert_success("All validation checks have been successful.") return(invisible(TRUE)) } diff --git a/tests/testthat/test-validate_pr.R b/tests/testthat/test-validate_pr.R index f7de1c58..6d771498 100644 --- a/tests/testthat/test-validate_pr.R +++ b/tests/testthat/test-validate_pr.R @@ -11,8 +11,9 @@ test_that("validate_pr works on valid PR", { skip_submit_window_check = TRUE) expect_snapshot(str(checks)) - expect_invisible(check_for_errors(checks)) - + expect_invisible(suppressMessages(check_for_errors(checks))) + expect_message(check_for_errors(checks), + regexp = "All validation checks have been successful.") }) From 0d62780f16d555270706bf6ecd799d0e346b56d3 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Tue, 3 Oct 2023 10:36:22 +0300 Subject: [PATCH 3/7] Ensure name of hub directory included when hub_path = "." --- R/check_config_hub_valid.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check_config_hub_valid.R b/R/check_config_hub_valid.R index 5aaed27b..90022fe7 100644 --- a/R/check_config_hub_valid.R +++ b/R/check_config_hub_valid.R @@ -22,7 +22,7 @@ check_config_hub_valid <- function(hub_path) { capture_check_cnd( check = check, - file_path = basename(hub_path), + file_path = basename(fs::path_abs(hub_path)), msg_subject = "All hub config files", msg_attribute = "valid.", msg_verbs = c("are", "must be"), From 2d27b027f198f64173429a1c9eb3f5905e298071 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Tue, 3 Oct 2023 12:20:55 +0300 Subject: [PATCH 4/7] Improve error message surfacing --- R/try_check.R | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/R/try_check.R b/R/try_check.R index 61eccec8..033d5f0b 100644 --- a/R/try_check.R +++ b/R/try_check.R @@ -10,7 +10,15 @@ try_check <- function(expr, file_path) { check <- try(expr, silent = TRUE) if (inherits(check, "try-error")) { - msg <- clean_msg(attr(check, "condition")$message) + message <- attr(check, "condition")$message + parent_msg <- attr(check, "condition")$parent$message + if (is.character(parent_msg)) { + parent_msg <- paste(parent_msg, collapse = " --> ") + msg <- paste(message, parent_msg, sep = " --> ") + } else { + msg <- message + } + msg <- clean_msg(msg) return( capture_exec_error( From cbae79ae3ccb980aa6db15a00d4ff5eb641509c7 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Tue, 3 Oct 2023 12:21:08 +0300 Subject: [PATCH 5/7] Add try_check tests --- tests/testthat/_snaps/try_check.md | 29 +++++++++++++++++++++++++++ tests/testthat/test-try_check.R | 32 ++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 tests/testthat/_snaps/try_check.md create mode 100644 tests/testthat/test-try_check.R diff --git a/tests/testthat/_snaps/try_check.md b/tests/testthat/_snaps/try_check.md new file mode 100644 index 00000000..3addd69f --- /dev/null +++ b/tests/testthat/_snaps/try_check.md @@ -0,0 +1,29 @@ +# try_check works + + Code + try_check(check_config_hub_valid(hub_path), "test_file.csv") + Output + + Message: + All hub config files are valid. + +--- + + Code + try_check(check_config_hub_valid("random_hub"), "test_file.csv") + Output + + Error: + ! EXEC ERROR: In index: 1. --> Assertion on 'hub_path' failed: Directory 'random_hub' does not exist. + +--- + + Code + try_check(opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, t0_colname = "random_col1", + t1_colname = "random_col1", horizon_colname = "horizon", timediff = lubridate::weeks()), + file_path) + Output + + Error: + ! EXEC ERROR: Assertion on 't0_colname' failed: Must be element of set ['forecast_date','target_end_date','horizon','target','location','output_type','output_type_id','value'], but is 'random_col1'. + diff --git a/tests/testthat/test-try_check.R b/tests/testthat/test-try_check.R new file mode 100644 index 00000000..675e3ecf --- /dev/null +++ b/tests/testthat/test-try_check.R @@ -0,0 +1,32 @@ +test_that("try_check works", { + hub_path <- system.file("testhubs/flusight", package = "hubValidations") + + expect_snapshot( + try_check( + check_config_hub_valid(hub_path), + "test_file.csv" + ) + ) + + expect_snapshot( + try_check( + check_config_hub_valid("random_hub"), + "test_file.csv" + ) + ) + + file_path <- "hub-ensemble/2023-05-08-hub-ensemble.parquet" + tbl <- read_model_out_file(file_path, hub_path) + + expect_snapshot( + try_check( + opt_check_tbl_horizon_timediff(tbl, file_path, hub_path, + t0_colname = "random_col1", + t1_colname = "random_col1", + horizon_colname = "horizon", + timediff = lubridate::weeks() + ), + file_path + ) + ) +}) From e837dd3d12725e090062dbe0fd31c36f00a88db6 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Tue, 3 Oct 2023 12:55:17 +0300 Subject: [PATCH 6/7] Bump version, add evan as author --- DESCRIPTION | 7 ++++++- NEWS.md | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index e1f04abe..78d6d0db 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: hubValidations Title: Testing framework for hubverse hub validations -Version: 0.0.0.9000 +Version: 0.0.0.9001 Authors@R: c( person( given = "Anna", @@ -8,6 +8,11 @@ Authors@R: c( email = "annakrystalli@googlemail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-2378-4915")), + person( + given = "Evan", + family = "Ray", + email = "elray@umass.edu", + role = c("aut")), person( given = "Hugo", family = "Gruson", diff --git a/NEWS.md b/NEWS.md index ff98710c..9192af99 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# hubValidations 0.0.0.9001 + +* Release of first draft `hubValidations` package + # hubValidations 0.0.0.9000 * Added a `NEWS.md` file to track changes to the package. From 65d6c8f5951fea889ac9597b67ceb9ef48629012 Mon Sep 17 00:00:00 2001 From: Anna Krystalli Date: Tue, 3 Oct 2023 18:30:35 +0300 Subject: [PATCH 7/7] Ignore READMEs when validating PR. Bullet list of unvalidated files. Resolves #46 --- R/validate_pr.R | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/R/validate_pr.R b/R/validate_pr.R index 3ae961c9..79c70dec 100644 --- a/R/validate_pr.R +++ b/R/validate_pr.R @@ -108,21 +108,38 @@ validate_pr <- function(hub_path = ".", gh_repo, pr_number, get_pr_dir_files <- function(pr_filenames, dir_name) { pr_filenames[ fs::path_has_parent(pr_filenames, dir_name) - ] %>% fs::path_rel(dir_name) + ] %>% + stringr::str_subset("README", negate = TRUE) %>% + fs::path_rel(dir_name) } inform_unvalidated_files <- function(model_output_files, model_metadata_files, pr_filenames) { validated_files <- c(model_output_files, model_metadata_files) - if (length(pr_filenames) != length(validated_files)) { + if (length(pr_filenames) == length(validated_files)) { + return(invisible(NULL)) + } + if (length(validated_files) == 0L) { + unvalidated_files <- pr_filenames + } else { validated_idx <- purrr::map_int( validated_files, ~ grep(.x, pr_filenames, fixed = TRUE) ) - cli::cli_inform( - "PR contains commits to additional files which have not been checked: - {.val {pr_filenames[-validated_idx]}}." - ) + unvalidated_files <- pr_filenames[-validated_idx] } + + unvalidated_bullets <- purrr::map_chr( + unvalidated_files, + ~ paste0("{.val ", .x, "}") + ) %>% + purrr::set_names(rep("*", length(unvalidated_files))) + + cli::cli_inform( + c( + "i" = "PR contains commits to additional files which have not been checked:", + unvalidated_bullets, "\n" + ) + ) }