Skip to content

Commit

Permalink
Add validate_submission fn
Browse files Browse the repository at this point in the history
  • Loading branch information
annakrystalli committed Sep 18, 2023
1 parent ce530a9 commit 92186df
Show file tree
Hide file tree
Showing 5 changed files with 326 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,5 @@ export(read_model_out_file)
export(validate_model_data)
export(validate_model_file)
export(validate_model_metadata)
export(validate_submission)
importFrom(magrittr,"%>%")
35 changes: 35 additions & 0 deletions R/validate_submission.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' Validate a submitted model data file. Checks both file level properties like
#' file name, extension, location etc as well as model output data, i.e. the contents
#' of the file.
#'
#' @inherit validate_model_data return params
#' @export
#'
#' @examples
#' hub_path <- system.file("testhubs/simple", package = "hubValidations")
#' file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
#' validate_submission(hub_path, file_path)
validate_submission <- function(hub_path, file_path, round_id_col = NULL,
validations_cfg_path = NULL) {
checks_file <- validate_model_file(
hub_path = hub_path,
file_path = file_path,
validations_cfg_path = validations_cfg_path
)

if (any(purrr::map_lgl(checks_file, ~ is_error(.x)))) {
return(checks_file)
}

checks_data <- validate_model_data(
hub_path = hub_path,
file_path = file_path,
round_id_col = round_id_col,
validations_cfg_path = validations_cfg_path
)

checks <- c(checks_file, checks_data)
class(checks) <- c("hub_validations", "list")

checks
}
51 changes: 51 additions & 0 deletions man/validate_submission.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

196 changes: 196 additions & 0 deletions tests/testthat/_snaps/validate_submission.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# validate_submission works

Code
str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv"))
Output
List of 17
$ file_exists :List of 4
..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_exists"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_name :List of 4
..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_name"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_location :List of 4
..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_location"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ round_id_valid :List of 4
..$ message : chr "`round_id` is valid. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_valid_round_id"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_format :List of 4
..$ message : chr "File is accepted hub format. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_format"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ metadata_exists :List of 4
..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_submission_metadata_file_exists"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_read :List of 4
..$ message : chr "File could be read successfully. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_read"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ valid_round_id_col:List of 4
..$ message : chr "`round_id_col` name is valid. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_valid_round_id_col"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ unique_round_id :List of 4
..$ message : chr "`round_id` column \"origin_date\" contains a single, unique round ID value. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_unique_round_id"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ colnames :List of 4
..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_colnames"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ col_types :List of 4
..$ message : chr "Column data types match hub schema. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_col_types"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ valid_vals :List of 4
..$ message : chr "Data rows contain valid value combinations \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_values"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ rows_unique :List of 4
..$ message : chr "All combinations of task ID column/`output_type`/`output_type_id` values are unique. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_rows_unique"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ req_vals :List of 5
..$ message : chr "Required task ID/output type/output type ID combinations all present. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ missing : tibble [0 x 6] (S3: tbl_df/tbl/data.frame)
.. ..$ origin_date : chr(0)
.. ..$ target : chr(0)
.. ..$ horizon : chr(0)
.. ..$ location : chr(0)
.. ..$ output_type : chr(0)
.. ..$ output_type_id: chr(0)
..$ call : chr "check_tbl_values_required"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ value_col_valid :List of 4
..$ message : chr "Values in column `value` all valid with respect to modeling task config. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_value_col"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ value_col_non_desc:List of 5
..$ message : chr "Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID\n value/outpu"| __truncated__
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ error_tbl : NULL
..$ call : chr "check_tbl_value_col_ascending"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ value_col_sum1 :List of 4
..$ message : chr "No pmf output types to check for sum of 1. Check skipped."
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_value_col_sum1"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ...
- attr(*, "class")= chr [1:2] "hub_validations" "list"

---

Code
str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv"))
Output
Classes 'hub_validations', 'list' hidden list of 1
$ file_exists:List of 6
..$ message : chr "File does not exist at path 'model-output/team1-goodmodel/2022-10-15-team1-goodmodel.csv'. \n "
..$ trace : NULL
..$ parent : NULL
..$ where : chr "team1-goodmodel/2022-10-15-team1-goodmodel.csv"
..$ call : chr "check_file_exists"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ...

---

Code
str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv",
round_id_col = "random_col"))
Output
List of 9
$ file_exists :List of 4
..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_exists"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_name :List of 4
..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_name"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_location :List of 4
..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_location"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ round_id_valid :List of 4
..$ message : chr "`round_id` is valid. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_valid_round_id"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_format :List of 4
..$ message : chr "File is accepted hub format. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_format"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ metadata_exists :List of 4
..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_submission_metadata_file_exists"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ file_read :List of 4
..$ message : chr "File could be read successfully. \n "
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_file_read"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ...
$ valid_round_id_col:List of 4
..$ message : chr "`round_id_col` name must be valid. \n Must be one of\n \"origin_date\", \""| __truncated__
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_valid_round_id_col"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_warning" "warning" ...
$ unique_round_id :List of 4
..$ message : chr "`round_id_col` name must be valid. \n Must be one of\n \"origin_date\", \""| __truncated__
..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
..$ call : chr "check_tbl_unique_round_id"
..$ use_cli_format: logi TRUE
..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_warning" "warning" ...
- attr(*, "class")= chr [1:2] "hub_validations" "list"

43 changes: 43 additions & 0 deletions tests/testthat/test-validate_submission.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
test_that("validate_submission works", {
hub_path <- system.file("testhubs/simple", package = "hubValidations")

# File that passes validation
expect_snapshot(
str(
validate_submission(hub_path,
file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
)
)
)
expect_s3_class(
validate_submission(hub_path,
file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv"
),
c("hub_validations", "list")
)

# File with validation error
expect_snapshot(
str(
validate_submission(hub_path,
file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv"
)
)
)
expect_s3_class(
validate_submission(hub_path,
file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv"
),
c("hub_validations", "list")
)

expect_snapshot(
str(
validate_submission(
hub_path,
file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv",
round_id_col = "random_col"
)
)
)
})

0 comments on commit 92186df

Please sign in to comment.