diff --git a/NAMESPACE b/NAMESPACE index bd5acea5..b04b619f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,4 +39,5 @@ export(read_model_out_file) export(validate_model_data) export(validate_model_file) export(validate_model_metadata) +export(validate_submission) importFrom(magrittr,"%>%") diff --git a/R/validate_submission.R b/R/validate_submission.R new file mode 100644 index 00000000..f5f437d1 --- /dev/null +++ b/R/validate_submission.R @@ -0,0 +1,35 @@ +#' Validate a submitted model data file. Checks both file level properties like +#' file name, extension, location etc as well as model output data, i.e. the contents +#' of the file. +#' +#' @inherit validate_model_data return params +#' @export +#' +#' @examples +#' hub_path <- system.file("testhubs/simple", package = "hubValidations") +#' file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" +#' validate_submission(hub_path, file_path) +validate_submission <- function(hub_path, file_path, round_id_col = NULL, + validations_cfg_path = NULL) { + checks_file <- validate_model_file( + hub_path = hub_path, + file_path = file_path, + validations_cfg_path = validations_cfg_path + ) + + if (any(purrr::map_lgl(checks_file, ~ is_error(.x)))) { + return(checks_file) + } + + checks_data <- validate_model_data( + hub_path = hub_path, + file_path = file_path, + round_id_col = round_id_col, + validations_cfg_path = validations_cfg_path + ) + + checks <- c(checks_file, checks_data) + class(checks) <- c("hub_validations", "list") + + checks +} diff --git a/man/validate_submission.Rd b/man/validate_submission.Rd new file mode 100644 index 00000000..a9c085ff --- /dev/null +++ b/man/validate_submission.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/validate_submission.R +\name{validate_submission} +\alias{validate_submission} +\title{Validate a submitted model data file. Checks both file level properties like +file name, extension, location etc as well as model output data, i.e. the contents +of the file.} +\usage{ +validate_submission( + hub_path, + file_path, + round_id_col = NULL, + validations_cfg_path = NULL +) +} +\arguments{ +\item{hub_path}{Either a character string path to a local Modeling Hub directory +or an object of class \verb{} created using functions \code{\link[hubUtils:s3_bucket]{s3_bucket()}} +or \code{\link[hubUtils:gs_bucket]{gs_bucket()}} by providing a string S3 or GCS bucket name or path to a +Modeling Hub directory stored in the cloud. +For more details consult the +\href{https://arrow.apache.org/docs/r/articles/fs.html}{Using cloud storage (S3, GCS)} +in the \code{arrow} package. +The hub must be fully configured with valid \code{admin.json} and \code{tasks.json} +files within the \code{hub-config} directory.} + +\item{file_path}{character string. Path to the file being validated relative to +the hub's model-output directory.} + +\item{round_id_col}{Character string. The name of the column containing +\code{round_id}s. Usually, the value of round property \code{round_id} in hub \code{tasks.json} +config file.} + +\item{validations_cfg_path}{Path to \code{validations.yml} file. If \code{NULL} +defaults to \code{hub-config/validations.yml}.} +} +\value{ +An object of class \code{hub_validations}. Each named element contains +a \code{hub_check} class object reflecting the result of a given check. Function +will return early if a check returns an error. +} +\description{ +Validate a submitted model data file. Checks both file level properties like +file name, extension, location etc as well as model output data, i.e. the contents +of the file. +} +\examples{ +hub_path <- system.file("testhubs/simple", package = "hubValidations") +file_path <- "team1-goodmodel/2022-10-08-team1-goodmodel.csv" +validate_submission(hub_path, file_path) +} diff --git a/tests/testthat/_snaps/validate_submission.md b/tests/testthat/_snaps/validate_submission.md new file mode 100644 index 00000000..205f707c --- /dev/null +++ b/tests/testthat/_snaps/validate_submission.md @@ -0,0 +1,196 @@ +# validate_submission works + + Code + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv")) + Output + List of 17 + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 4 + ..$ message : chr "`round_id_col` name is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ unique_round_id :List of 4 + ..$ message : chr "`round_id` column \"origin_date\" contains a single, unique round ID value. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ colnames :List of 4 + ..$ message : chr "Column names are consistent with expected round task IDs and std column names. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_colnames" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ col_types :List of 4 + ..$ message : chr "Column data types match hub schema. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_col_types" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_vals :List of 4 + ..$ message : chr "Data rows contain valid value combinations \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_values" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ rows_unique :List of 4 + ..$ message : chr "All combinations of task ID column/`output_type`/`output_type_id` values are unique. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_rows_unique" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ req_vals :List of 5 + ..$ message : chr "Required task ID/output type/output type ID combinations all present. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ missing : tibble [0 x 6] (S3: tbl_df/tbl/data.frame) + .. ..$ origin_date : chr(0) + .. ..$ target : chr(0) + .. ..$ horizon : chr(0) + .. ..$ location : chr(0) + .. ..$ output_type : chr(0) + .. ..$ output_type_id: chr(0) + ..$ call : chr "check_tbl_values_required" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_valid :List of 4 + ..$ message : chr "Values in column `value` all valid with respect to modeling task config. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_value_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_non_desc:List of 5 + ..$ message : chr "Values in `value` column are non-decreasing as output_type_ids increase for all unique task ID\n value/outpu"| __truncated__ + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ error_tbl : NULL + ..$ call : chr "check_tbl_value_col_ascending" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ value_col_sum1 :List of 4 + ..$ message : chr "No pmf output types to check for sum of 1. Check skipped." + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_value_col_sum1" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_info" "hub_check" "rlang_message" "message" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + +--- + + Code + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv")) + Output + Classes 'hub_validations', 'list' hidden list of 1 + $ file_exists:List of 6 + ..$ message : chr "File does not exist at path 'model-output/team1-goodmodel/2022-10-15-team1-goodmodel.csv'. \n " + ..$ trace : NULL + ..$ parent : NULL + ..$ where : chr "team1-goodmodel/2022-10-15-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_error" "error" ... + +--- + + Code + str(validate_submission(hub_path, file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + round_id_col = "random_col")) + Output + List of 9 + $ file_exists :List of 4 + ..$ message : chr "File exists at path 'model-output/team1-goodmodel/2022-10-08-team1-goodmodel.csv'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_name :List of 4 + ..$ message : chr "File name \"2022-10-08-team1-goodmodel.csv\" is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_name" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_location :List of 4 + ..$ message : chr "File directory name matches `model_id`\n metadata in file name. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_location" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ round_id_valid :List of 4 + ..$ message : chr "`round_id` is valid. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_format :List of 4 + ..$ message : chr "File is accepted hub format. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_format" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ metadata_exists :List of 4 + ..$ message : chr "Metadata file exists at path 'model-metadata/team1-goodmodel.yaml'. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_submission_metadata_file_exists" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ file_read :List of 4 + ..$ message : chr "File could be read successfully. \n " + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_file_read" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_success" "hub_check" "rlang_message" "message" ... + $ valid_round_id_col:List of 4 + ..$ message : chr "`round_id_col` name must be valid. \n Must be one of\n \"origin_date\", \""| __truncated__ + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_valid_round_id_col" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_failure" "hub_check" "rlang_warning" "warning" ... + $ unique_round_id :List of 4 + ..$ message : chr "`round_id_col` name must be valid. \n Must be one of\n \"origin_date\", \""| __truncated__ + ..$ where : chr "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ..$ call : chr "check_tbl_unique_round_id" + ..$ use_cli_format: logi TRUE + ..- attr(*, "class")= chr [1:5] "check_error" "hub_check" "rlang_warning" "warning" ... + - attr(*, "class")= chr [1:2] "hub_validations" "list" + diff --git a/tests/testthat/test-validate_submission.R b/tests/testthat/test-validate_submission.R new file mode 100644 index 00000000..4c4fb5b1 --- /dev/null +++ b/tests/testthat/test-validate_submission.R @@ -0,0 +1,43 @@ +test_that("validate_submission works", { + hub_path <- system.file("testhubs/simple", package = "hubValidations") + + # File that passes validation + expect_snapshot( + str( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ) + ) + ) + expect_s3_class( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv" + ), + c("hub_validations", "list") + ) + + # File with validation error + expect_snapshot( + str( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv" + ) + ) + ) + expect_s3_class( + validate_submission(hub_path, + file_path = "team1-goodmodel/2022-10-15-team1-goodmodel.csv" + ), + c("hub_validations", "list") + ) + + expect_snapshot( + str( + validate_submission( + hub_path, + file_path = "team1-goodmodel/2022-10-08-team1-goodmodel.csv", + round_id_col = "random_col" + ) + ) + ) +})