From 9b9f6d5a167da0e54052eb13b7bbecdd9ccb76f3 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 13 Nov 2024 18:07:20 +0000 Subject: [PATCH 1/8] Add `row_filters` and `col_select` to `get_dataset` --- R/get_dataset.R | 6 +++++- man/get_dataset.Rd | 10 ++++++++++ tests/testthat/test-get_dataset.R | 22 ++++++++++++++++++++-- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/R/get_dataset.R b/R/get_dataset.R index 98cef2f..b8b8cf6 100644 --- a/R/get_dataset.R +++ b/R/get_dataset.R @@ -21,6 +21,8 @@ get_dataset <- function(dataset_name, max_resources = NULL, rows = NULL, + row_filters = NULL, + col_select = NULL, include_context = FALSE) { # throw error if name type/format is invalid check_dataset_name(dataset_name) @@ -50,7 +52,9 @@ get_dataset <- function(dataset_name, all_data <- purrr::map( selection_ids, get_resource, - rows = rows + rows = rows, + row_filters = row_filters, + col_select = col_select, ) # resolve class issues diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd index 855f102..2acf66a 100644 --- a/man/get_dataset.Rd +++ b/man/get_dataset.Rd @@ -8,6 +8,8 @@ get_dataset( dataset_name, max_resources = NULL, rows = NULL, + row_filters = NULL, + col_select = NULL, include_context = FALSE ) } @@ -22,6 +24,14 @@ it will return the n latest resources} \item{rows}{(optional) specify the max number of rows to return for each resource.} +\item{row_filters}{(optional) a named list or vector that specifies values of +columns/fields to keep. +e.g. list(Date = 20220216, Sex = "Female").} + +\item{col_select}{(optional) a character vector containing the names of +desired columns/fields. +e.g. c("Date", "Sex").} + \item{include_context}{(optional) If \code{TRUE} additional information about the resource will be added as columns to the data, including the resource ID, the resource name, the creation date and the last modified/updated date.} diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R index f1cb8df..34ddbca 100644 --- a/tests/testthat/test-get_dataset.R +++ b/tests/testthat/test-get_dataset.R @@ -1,6 +1,6 @@ skip_if_offline(host = "www.opendata.nhs.scot") -test_that("returns data in the expected format", { +test_that("get_dataset returns data in the expected format", { n_resources <- 2 n_rows <- 2 data <- get_dataset( @@ -15,7 +15,25 @@ test_that("returns data in the expected format", { expect_named(data) }) -test_that("errors properly", { +test_that("get_dataset works properly with filters", { + n_resources <- 3 + n_rows <- 10 + columns <- c("Date", "PracticeCode", "HSCP", "AllAges") + + data <- get_dataset("gp-practice-populations", + max_resources = n_resources, + rows = n_rows, + row_filters = list(HSCP = "S37000026"), + col_select = columns + ) + + expect_s3_class(data, "tbl_df") + expect_equal(nrow(data), n_resources * n_rows) + expect_named(data, columns) + expect_true(all(data[["HSCP"]] == "S37000026")) +}) + +test_that("get_dataset errors properly", { expect_error(get_dataset("Mal-formed-name"), regexp = "The dataset name supplied `Mal-formed-name` is invalid" ) From 963cb26869c234be2862274fa4f15f4884fc6999 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 13 Nov 2024 18:39:27 +0000 Subject: [PATCH 2/8] Add additional check to `parse_col_select` --- R/parse_col_select.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/parse_col_select.R b/R/parse_col_select.R index 82232b2..e45eed8 100644 --- a/R/parse_col_select.R +++ b/R/parse_col_select.R @@ -8,7 +8,9 @@ parse_col_select <- function(col_select) { return(NULL) } - return( - paste0(col_select, collapse = ",") - ) + if (!inherits(col_select, "character")) { + cli::cli_abort("{.arg col_select} must be a {.cls character} vector, not a {.cls {class(col_select)}} vector.") + } + + return(paste0(col_select, collapse = ",")) } From 975b007d34d4dffe55f939218fac5d54babe1302 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 13 Nov 2024 18:41:38 +0000 Subject: [PATCH 3/8] Add some additional checks to `row_filter` --- R/parse_row_filters.R | 30 ++++++++++++------------- tests/testthat/test-get_resource_dump.R | 2 +- tests/testthat/test-parse_row_filters.R | 2 +- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R index 059d5ef..67fa5bb 100644 --- a/R/parse_row_filters.R +++ b/R/parse_row_filters.R @@ -1,6 +1,6 @@ #' Create JSON 'dict' from named list or vector #' @description Formats a list or named vector into a valid query -#' @param row_filters list or named vectors matching fileds to values +#' @param row_filters list or named vectors matching fields to values #' @return a json as a character string parse_row_filters <- function(row_filters) { # exit function if no filters @@ -8,30 +8,28 @@ parse_row_filters <- function(row_filters) { return(NULL) } + if (!inherits(row_filters, "list")) { + cli::cli_abort("{.arg row_filters} must be a {.cls list}, not a {.cls {class(row_filters)}}.") + } + # check if any filters in list have length > 1 too_many <- sapply(row_filters, length) > 1 if (any(too_many)) { cli::cli_abort(c( - "Invalid input for {.var row_filters}", - i = "{names(row_filters)[which(too_many)]} in {.var row_filters} has too many values. ", - x = "The {.var row_filters} list must only contain vectors of length 1." + "Invalid input for {.arg row_filters}", + i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.", + x = "The {.arg row_filters} list must only contain vectors of length 1." )) } - # check if any items in the list/vector have the same name - # find number of unique names - n_u_row_filters <- length(unique(names(row_filters))) - # find total number of names - n_row_filters <- length(names(row_filters)) - # if same, all names are unique - unique_names <- n_u_row_filters == n_row_filters - - if (!unique_names) { + # check if any items in the list/vector are duplicates + duplicates <- duplicated(names(row_filters)) + if (any(duplicates)) { cli::cli_abort(c( - "Invalid input for {.var row_filters}", - x = "One or more elements in {.var row_filters} have the same name.", - i = "Only one filter per field is currently supported by `get_resource`." + "Invalid input for {.arg row_filters}", + x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.", + i = "Only one filter per field is currently supported by {.fun get_resource}." )) } diff --git a/tests/testthat/test-get_resource_dump.R b/tests/testthat/test-get_resource_dump.R index 624e5c3..bd874e6 100644 --- a/tests/testthat/test-get_resource_dump.R +++ b/tests/testthat/test-get_resource_dump.R @@ -15,7 +15,7 @@ test_that("returns full data if rows is set to over 99999", { data <- get_resource( res_id = gp_list_apr_2021, rows = 9999999, - row_filters = c("GPPracticeName" = "The Blue Practice") + row_filters = list("GPPracticeName" = "The Blue Practice") ), regexp = "Can't request over 99,999 rows" ) diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R index c9a9918..cf4e3a0 100644 --- a/tests/testthat/test-parse_row_filters.R +++ b/tests/testthat/test-parse_row_filters.R @@ -14,7 +14,7 @@ test_that("throws error for length > 1", { test_that("throws error for non-unique names", { expect_error( parse_row_filters(list(x = 1, x = 2)), - regexp = "Only one filter per field is currently supported by `get_resource`" + regexp = "Only one filter per field is currently supported" ) }) From ae59fb7a00eb879551d5c665fa152bed7dff2bdc Mon Sep 17 00:00:00 2001 From: Moohan Date: Wed, 13 Nov 2024 18:44:09 +0000 Subject: [PATCH 4/8] Update documentation --- man/parse_row_filters.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/parse_row_filters.Rd b/man/parse_row_filters.Rd index b6ed3d2..eed28c1 100644 --- a/man/parse_row_filters.Rd +++ b/man/parse_row_filters.Rd @@ -7,7 +7,7 @@ parse_row_filters(row_filters) } \arguments{ -\item{row_filters}{list or named vectors matching fileds to values} +\item{row_filters}{list or named vectors matching fields to values} } \value{ a json as a character string From 0d7ecda8a77c1176f5ff7f6e27cc4497b4dfcfe7 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 13 Nov 2024 20:56:28 +0000 Subject: [PATCH 5/8] Update parse_row_filters.R --- R/parse_row_filters.R | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R index 67fa5bb..f17e177 100644 --- a/R/parse_row_filters.R +++ b/R/parse_row_filters.R @@ -8,8 +8,21 @@ parse_row_filters <- function(row_filters) { return(NULL) } - if (!inherits(row_filters, "list")) { - cli::cli_abort("{.arg row_filters} must be a {.cls list}, not a {.cls {class(row_filters)}}.") + # Check if `row_filters` is a list or a character vector + if (!is.list(row_filters) && !is.character(row_filters)) { + cli::cli_abort("{.arg row_filters} must be a named {.cls list} or a named {.cls character} vector, not a {.cls {class(row_filters)}}.") + } + + # If it's a list, ensure it's depth 1 and elements are named + if (is.list(row_filters)) { + if (any(lengths(row_filters) > 1) || any(names(row_filters) == "")) { + cli::cli_abort("{.arg row_filters} must be a list of depth 1 with named elements.") + } + } + + # If it's a character vector, ensure it's named + if (is.character(row_filters) && any(names(row_filters) == "")) { + cli::cli_abort("{.arg row_filters} must be a named character vector.") } # check if any filters in list have length > 1 From c83f57ff3104acc64a947352636a3c0d4600ee2b Mon Sep 17 00:00:00 2001 From: Moohan Date: Wed, 13 Nov 2024 20:57:56 +0000 Subject: [PATCH 6/8] Style code (GHA) --- R/parse_row_filters.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R index f17e177..1749629 100644 --- a/R/parse_row_filters.R +++ b/R/parse_row_filters.R @@ -12,14 +12,14 @@ parse_row_filters <- function(row_filters) { if (!is.list(row_filters) && !is.character(row_filters)) { cli::cli_abort("{.arg row_filters} must be a named {.cls list} or a named {.cls character} vector, not a {.cls {class(row_filters)}}.") } - + # If it's a list, ensure it's depth 1 and elements are named if (is.list(row_filters)) { if (any(lengths(row_filters) > 1) || any(names(row_filters) == "")) { cli::cli_abort("{.arg row_filters} must be a list of depth 1 with named elements.") } } - + # If it's a character vector, ensure it's named if (is.character(row_filters) && any(names(row_filters) == "")) { cli::cli_abort("{.arg row_filters} must be a named character vector.") From 2fe5a2a06964b841340200119b147b1e03eb5477 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 14 Nov 2024 10:02:51 +0000 Subject: [PATCH 7/8] Improve tests and errors for `parse_row_filter` --- R/parse_row_filters.R | 24 ++++----- tests/testthat/test-parse_row_filters.R | 71 ++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 16 deletions(-) diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R index 1749629..f099791 100644 --- a/R/parse_row_filters.R +++ b/R/parse_row_filters.R @@ -8,25 +8,21 @@ parse_row_filters <- function(row_filters) { return(NULL) } - # Check if `row_filters` is a list or a character vector - if (!is.list(row_filters) && !is.character(row_filters)) { - cli::cli_abort("{.arg row_filters} must be a named {.cls list} or a named {.cls character} vector, not a {.cls {class(row_filters)}}.") + # Check if `row_filters` is a list or a character or numeric vector + if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) { + cli::cli_abort( + "{.arg row_filters} must be a named {.cls list} or a named + {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}." + ) } - # If it's a list, ensure it's depth 1 and elements are named - if (is.list(row_filters)) { - if (any(lengths(row_filters) > 1) || any(names(row_filters) == "")) { - cli::cli_abort("{.arg row_filters} must be a list of depth 1 with named elements.") - } - } - - # If it's a character vector, ensure it's named - if (is.character(row_filters) && any(names(row_filters) == "")) { - cli::cli_abort("{.arg row_filters} must be a named character vector.") + # Ensure it's elements are named + if (is.null(names(row_filters)) || any(names(row_filters) == "")) { + cli::cli_abort("{.arg row_filters} should be a named {.cls list}.") } # check if any filters in list have length > 1 - too_many <- sapply(row_filters, length) > 1 + too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1) if (any(too_many)) { cli::cli_abort(c( diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R index cf4e3a0..e64a81b 100644 --- a/tests/testthat/test-parse_row_filters.R +++ b/tests/testthat/test-parse_row_filters.R @@ -4,10 +4,44 @@ test_that("returns NULL if `row_filters` = NULL", { ) }) +test_that("throws error for bad types", { + expect_error( + parse_row_filters(mtcars), + regexp = " must be a named .+? not a " + ) + expect_error( + parse_row_filters(c(x = TRUE)), + regexp = " must be a named .+? not a " + ) + expect_error( + parse_row_filters(c(x = factor(1))), + regexp = " must be a named .+? not a " + ) +}) + test_that("throws error for length > 1", { expect_error( parse_row_filters(list(x = letters)), - regexp = "(list must only contain vectors of length 1.)" + regexp = " has too many values\\." + ) +}) + +test_that("throws error when un-named", { + expect_error( + parse_row_filters(list(1, 2)), + regexp = " should be a named " + ) + expect_error( + parse_row_filters(list(a = 1, 2)), + regexp = " should be a named " + ) + expect_error( + parse_row_filters(c(1, 2)), + regexp = " should be a named " + ) + expect_error( + parse_row_filters(c(a = 1, 2)), + regexp = " should be a named " ) }) @@ -16,12 +50,45 @@ test_that("throws error for non-unique names", { parse_row_filters(list(x = 1, x = 2)), regexp = "Only one filter per field is currently supported" ) + expect_error( + parse_row_filters(c(x = 1, x = 2)), + regexp = "Only one filter per field is currently supported" + ) +}) + + +test_that("returns JSON string from a named vector", { + expect_true( + jsonlite::validate( + parse_row_filters(c(x = 5.0, y = 6.0)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(c(x = 5L, y = 6L)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(c(x = "a", y = "b")) + ) + ) }) test_that("returns JSON string from list", { expect_true( jsonlite::validate( - parse_row_filters(list(x = 5, y = 6)) + parse_row_filters(list(x = 5.0, y = 6.0)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(list(x = 5L, y = 6L)) + ) + ) + expect_true( + jsonlite::validate( + parse_row_filters(list(x = "a", y = "b")) ) ) }) From 29f88f55744e6935d5324dd333de11e7d4571f23 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 14 Nov 2024 15:27:19 +0000 Subject: [PATCH 8/8] Fix tests Something must have changed with factors between v4.0.1 and now... --- tests/testthat/test-parse_row_filters.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R index e64a81b..90558d5 100644 --- a/tests/testthat/test-parse_row_filters.R +++ b/tests/testthat/test-parse_row_filters.R @@ -14,7 +14,11 @@ test_that("throws error for bad types", { regexp = " must be a named .+? not a " ) expect_error( - parse_row_filters(c(x = factor(1))), + parse_row_filters(c(x = NA)), + regexp = " must be a named .+? not a " + ) + expect_error( + parse_row_filters(c(x = as.Date("2000-01-01"))), regexp = " must be a named .+? not a " ) })