diff --git a/R/get_dataset.R b/R/get_dataset.R index 98cef2f..b8b8cf6 100644 --- a/R/get_dataset.R +++ b/R/get_dataset.R @@ -21,6 +21,8 @@ get_dataset <- function(dataset_name, max_resources = NULL, rows = NULL, + row_filters = NULL, + col_select = NULL, include_context = FALSE) { # throw error if name type/format is invalid check_dataset_name(dataset_name) @@ -50,7 +52,9 @@ get_dataset <- function(dataset_name, all_data <- purrr::map( selection_ids, get_resource, - rows = rows + rows = rows, + row_filters = row_filters, + col_select = col_select, ) # resolve class issues diff --git a/R/parse_col_select.R b/R/parse_col_select.R index 82232b2..e45eed8 100644 --- a/R/parse_col_select.R +++ b/R/parse_col_select.R @@ -8,7 +8,9 @@ parse_col_select <- function(col_select) { return(NULL) } - return( - paste0(col_select, collapse = ",") - ) + if (!inherits(col_select, "character")) { + cli::cli_abort("{.arg col_select} must be a {.cls character} vector, not a {.cls {class(col_select)}} vector.") + } + + return(paste0(col_select, collapse = ",")) } diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R index 059d5ef..f099791 100644 --- a/R/parse_row_filters.R +++ b/R/parse_row_filters.R @@ -1,6 +1,6 @@ #' Create JSON 'dict' from named list or vector #' @description Formats a list or named vector into a valid query -#' @param row_filters list or named vectors matching fileds to values +#' @param row_filters list or named vectors matching fields to values #' @return a json as a character string parse_row_filters <- function(row_filters) { # exit function if no filters @@ -8,30 +8,37 @@ parse_row_filters <- function(row_filters) { return(NULL) } + # Check if `row_filters` is a list or a character or numeric vector + if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) { + cli::cli_abort( + "{.arg row_filters} must be a named {.cls list} or a named + {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}." + ) + } + + # Ensure it's elements are named + if (is.null(names(row_filters)) || any(names(row_filters) == "")) { + cli::cli_abort("{.arg row_filters} should be a named {.cls list}.") + } + # check if any filters in list have length > 1 - too_many <- sapply(row_filters, length) > 1 + too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1) if (any(too_many)) { cli::cli_abort(c( - "Invalid input for {.var row_filters}", - i = "{names(row_filters)[which(too_many)]} in {.var row_filters} has too many values. ", - x = "The {.var row_filters} list must only contain vectors of length 1." + "Invalid input for {.arg row_filters}", + i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.", + x = "The {.arg row_filters} list must only contain vectors of length 1." )) } - # check if any items in the list/vector have the same name - # find number of unique names - n_u_row_filters <- length(unique(names(row_filters))) - # find total number of names - n_row_filters <- length(names(row_filters)) - # if same, all names are unique - unique_names <- n_u_row_filters == n_row_filters - - if (!unique_names) { + # check if any items in the list/vector are duplicates + duplicates <- duplicated(names(row_filters)) + if (any(duplicates)) { cli::cli_abort(c( - "Invalid input for {.var row_filters}", - x = "One or more elements in {.var row_filters} have the same name.", - i = "Only one filter per field is currently supported by `get_resource`." + "Invalid input for {.arg row_filters}", + x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.", + i = "Only one filter per field is currently supported by {.fun get_resource}." )) } diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 855f102..2acf66a 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -8,6 +8,8 @@ get_dataset( dataset_name, max_resources = NULL, rows = NULL, + row_filters = NULL, + col_select = NULL, include_context = FALSE ) } @@ -22,6 +24,14 @@ it will return the n latest resources} \item{rows}{(optional) specify the max number of rows to return for each resource.} +\item{row_filters}{(optional) a named list or vector that specifies values of +columns/fields to keep. +e.g. list(Date = 20220216, Sex = "Female").} + +\item{col_select}{(optional) a character vector containing the names of +desired columns/fields. +e.g. c("Date", "Sex").} + \item{include_context}{(optional) If \code{TRUE} additional information about the resource will be added as columns to the data, including the resource ID, the resource name, the creation date and the last modified/updated date.} diff --git a/man/parse_row_filters.Rd b/man/parse_row_filters.Rd index b6ed3d2..eed28c1 100644 --- a/man/parse_row_filters.Rd +++ b/man/parse_row_filters.Rd @@ -7,7 +7,7 @@ parse_row_filters(row_filters) } \arguments{ -\item{row_filters}{list or named vectors matching fileds to values} +\item{row_filters}{list or named vectors matching fields to values} } \value{ a json as a character string diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R index f1cb8df..34ddbca 100644 --- a/tests/testthat/test-get_dataset.R +++ b/tests/testthat/test-get_dataset.R @@ -1,6 +1,6 @@ skip_if_offline(host = "www.opendata.nhs.scot") -test_that("returns data in the expected format", { +test_that("get_dataset returns data in the expected format", { n_resources <- 2 n_rows <- 2 data <- get_dataset( @@ -15,7 +15,25 @@ test_that("returns data in the expected format", { expect_named(data) }) -test_that("errors properly", { +test_that("get_dataset works properly with filters", { + n_resources <- 3 + n_rows <- 10 + columns <- c("Date", "PracticeCode", "HSCP", "AllAges") + + data <- get_dataset("gp-practice-populations", + max_resources = n_resources, + rows = n_rows, + row_filters = list(HSCP = "S37000026"), + col_select = columns + ) + + expect_s3_class(data, "tbl_df") + expect_equal(nrow(data), n_resources * n_rows) + expect_named(data, columns) + expect_true(all(data[["HSCP"]] == "S37000026")) +}) + +test_that("get_dataset errors properly", { expect_error(get_dataset("Mal-formed-name"), regexp = "The dataset name supplied `Mal-formed-name` is invalid" ) diff --git a/tests/testthat/test-get_resource_dump.R b/tests/testthat/test-get_resource_dump.R index 624e5c3..bd874e6 100644 --- a/tests/testthat/test-get_resource_dump.R +++ b/tests/testthat/test-get_resource_dump.R @@ -15,7 +15,7 @@ test_that("returns full data if rows is set to over 99999", { data <- get_resource( res_id = gp_list_apr_2021, rows = 9999999, - row_filters = c("GPPracticeName" = "The Blue Practice") + row_filters = list("GPPracticeName" = "The Blue Practice") ), regexp = "Can't request over 99,999 rows" ) diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R index c9a9918..90558d5 100644 --- a/tests/testthat/test-parse_row_filters.R +++ b/tests/testthat/test-parse_row_filters.R @@ -4,24 +4,95 @@ test_that("returns NULL if `row_filters` = NULL", { ) }) +test_that("throws error for bad types", { + expect_error( + parse_row_filters(mtcars), + regexp = " must be a named .+? not a " + ) + expect_error( + parse_row_filters(c(x = TRUE)), + regexp = " must be a named .+? not a " + ) + expect_error( + parse_row_filters(c(x = NA)), + regexp = " must be a named .+? not a " + ) + expect_error( + parse_row_filters(c(x = as.Date("2000-01-01"))), + regexp = " must be a named .+? not a " + ) +}) + test_that("throws error for length > 1", { expect_error( parse_row_filters(list(x = letters)), - regexp = "(list must only contain vectors of length 1.)" + regexp = " has too many values\\." + ) +}) + +test_that("throws error when un-named", { + expect_error( + parse_row_filters(list(1, 2)), + regexp = " should be a named " + ) + expect_error( + parse_row_filters(list(a = 1, 2)), + regexp = " should be a named " + ) + expect_error( + parse_row_filters(c(1, 2)), + regexp = " should be a named " + ) + expect_error( + parse_row_filters(c(a = 1, 2)), + regexp = " should be a named " ) }) test_that("throws error for non-unique names", { expect_error( parse_row_filters(list(x = 1, x = 2)), - regexp = "Only one filter per field is currently supported by `get_resource`" + regexp = "Only one filter per field is currently supported" + ) + expect_error( + parse_row_filters(c(x = 1, x = 2)), + regexp = "Only one filter per field is currently supported" + ) +}) + + +test_that("returns JSON string from a named vector", { + expect_true( + jsonlite::validate( + parse_row_filters(c(x = 5.0, y = 6.0)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(c(x = 5L, y = 6L)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(c(x = "a", y = "b")) + ) ) }) test_that("returns JSON string from list", { expect_true( jsonlite::validate( - parse_row_filters(list(x = 5, y = 6)) + parse_row_filters(list(x = 5.0, y = 6.0)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(list(x = 5L, y = 6L)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(list(x = "a", y = "b")) ) ) })