From 9b9f6d5a167da0e54052eb13b7bbecdd9ccb76f3 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 13 Nov 2024 18:07:20 +0000
Subject: [PATCH 1/8] Add `row_filters` and `col_select` to `get_dataset`

---
 R/get_dataset.R                   |  6 +++++-
 man/get_dataset.Rd                | 10 ++++++++++
 tests/testthat/test-get_dataset.R | 22 ++++++++++++++++++++--
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/R/get_dataset.R b/R/get_dataset.R
index 98cef2f..b8b8cf6 100644
--- a/R/get_dataset.R
+++ b/R/get_dataset.R
@@ -21,6 +21,8 @@
 get_dataset <- function(dataset_name,
                         max_resources = NULL,
                         rows = NULL,
+                        row_filters = NULL,
+                        col_select = NULL,
                         include_context = FALSE) {
   # throw error if name type/format is invalid
   check_dataset_name(dataset_name)
@@ -50,7 +52,9 @@ get_dataset <- function(dataset_name,
   all_data <- purrr::map(
     selection_ids,
     get_resource,
-    rows = rows
+    rows = rows,
+    row_filters = row_filters,
+    col_select = col_select,
   )
 
   # resolve class issues
diff --git a/man/get_dataset.Rd b/man/get_dataset.Rd
index 855f102..2acf66a 100644
--- a/man/get_dataset.Rd
+++ b/man/get_dataset.Rd
@@ -8,6 +8,8 @@ get_dataset(
   dataset_name,
   max_resources = NULL,
   rows = NULL,
+  row_filters = NULL,
+  col_select = NULL,
   include_context = FALSE
 )
 }
@@ -22,6 +24,14 @@ it will return the n latest resources}
 \item{rows}{(optional) specify the max number of rows
 to return for each resource.}
 
+\item{row_filters}{(optional) a named list or vector that specifies values of
+columns/fields to keep.
+e.g. list(Date = 20220216, Sex = "Female").}
+
+\item{col_select}{(optional) a character vector containing the names of
+desired columns/fields.
+e.g. c("Date", "Sex").}
+
 \item{include_context}{(optional) If \code{TRUE} additional information about the
 resource will be added as columns to the data, including the resource ID, the
 resource name, the creation date and the last modified/updated date.}
diff --git a/tests/testthat/test-get_dataset.R b/tests/testthat/test-get_dataset.R
index f1cb8df..34ddbca 100644
--- a/tests/testthat/test-get_dataset.R
+++ b/tests/testthat/test-get_dataset.R
@@ -1,6 +1,6 @@
 skip_if_offline(host = "www.opendata.nhs.scot")
 
-test_that("returns data in the expected format", {
+test_that("get_dataset returns data in the expected format", {
   n_resources <- 2
   n_rows <- 2
   data <- get_dataset(
@@ -15,7 +15,25 @@ test_that("returns data in the expected format", {
   expect_named(data)
 })
 
-test_that("errors properly", {
+test_that("get_dataset works properly with filters", {
+  n_resources <- 3
+  n_rows <- 10
+  columns <- c("Date", "PracticeCode", "HSCP", "AllAges")
+
+  data <- get_dataset("gp-practice-populations",
+    max_resources = n_resources,
+    rows = n_rows,
+    row_filters = list(HSCP = "S37000026"),
+    col_select = columns
+  )
+
+  expect_s3_class(data, "tbl_df")
+  expect_equal(nrow(data), n_resources * n_rows)
+  expect_named(data, columns)
+  expect_true(all(data[["HSCP"]] == "S37000026"))
+})
+
+test_that("get_dataset errors properly", {
   expect_error(get_dataset("Mal-formed-name"),
     regexp = "The dataset name supplied `Mal-formed-name` is invalid"
   )

From 963cb26869c234be2862274fa4f15f4884fc6999 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 13 Nov 2024 18:39:27 +0000
Subject: [PATCH 2/8] Add additional check to `parse_col_select`

---
 R/parse_col_select.R | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/R/parse_col_select.R b/R/parse_col_select.R
index 82232b2..e45eed8 100644
--- a/R/parse_col_select.R
+++ b/R/parse_col_select.R
@@ -8,7 +8,9 @@ parse_col_select <- function(col_select) {
     return(NULL)
   }
 
-  return(
-    paste0(col_select, collapse = ",")
-  )
+  if (!inherits(col_select, "character")) {
+    cli::cli_abort("{.arg col_select} must be a {.cls character} vector, not a {.cls {class(col_select)}} vector.")
+  }
+
+  return(paste0(col_select, collapse = ","))
 }

From 975b007d34d4dffe55f939218fac5d54babe1302 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 13 Nov 2024 18:41:38 +0000
Subject: [PATCH 3/8] Add some additional checks to `row_filter`

---
 R/parse_row_filters.R                   | 30 ++++++++++++-------------
 tests/testthat/test-get_resource_dump.R |  2 +-
 tests/testthat/test-parse_row_filters.R |  2 +-
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R
index 059d5ef..67fa5bb 100644
--- a/R/parse_row_filters.R
+++ b/R/parse_row_filters.R
@@ -1,6 +1,6 @@
 #' Create JSON 'dict' from named list or vector
 #' @description Formats a list or named vector into a valid query
-#' @param row_filters list or named vectors matching fileds to values
+#' @param row_filters list or named vectors matching fields to values
 #' @return a json as a character string
 parse_row_filters <- function(row_filters) {
   # exit function if no filters
@@ -8,30 +8,28 @@ parse_row_filters <- function(row_filters) {
     return(NULL)
   }
 
+  if (!inherits(row_filters, "list")) {
+    cli::cli_abort("{.arg row_filters} must be a {.cls list}, not a {.cls {class(row_filters)}}.")
+  }
+
   # check if any filters in list have length > 1
   too_many <- sapply(row_filters, length) > 1
 
   if (any(too_many)) {
     cli::cli_abort(c(
-      "Invalid input for {.var row_filters}",
-      i = "{names(row_filters)[which(too_many)]} in {.var row_filters} has too many values. ",
-      x = "The {.var row_filters} list must only contain vectors of length 1."
+      "Invalid input for {.arg row_filters}",
+      i = "The {.val {names(row_filters)[which(too_many)]}} filter{?s} {?has/have} too many values.",
+      x = "The {.arg row_filters} list must only contain vectors of length 1."
     ))
   }
 
-  # check if any items in the list/vector have the same name
-  # find number of unique names
-  n_u_row_filters <- length(unique(names(row_filters)))
-  # find total number of names
-  n_row_filters <- length(names(row_filters))
-  # if same, all names are unique
-  unique_names <- n_u_row_filters == n_row_filters
-
-  if (!unique_names) {
+  # check if any items in the list/vector are duplicates
+  duplicates <- duplicated(names(row_filters))
+  if (any(duplicates)) {
     cli::cli_abort(c(
-      "Invalid input for {.var row_filters}",
-      x = "One or more elements in {.var row_filters} have the same name.",
-      i = "Only one filter per field is currently supported by `get_resource`."
+      "Invalid input for {.arg row_filters}",
+      x = "The {.val {names(row_filters)[which(duplicates)]}} filter{?s} {?is/are} duplicated.",
+      i = "Only one filter per field is currently supported by {.fun get_resource}."
     ))
   }
 
diff --git a/tests/testthat/test-get_resource_dump.R b/tests/testthat/test-get_resource_dump.R
index 624e5c3..bd874e6 100644
--- a/tests/testthat/test-get_resource_dump.R
+++ b/tests/testthat/test-get_resource_dump.R
@@ -15,7 +15,7 @@ test_that("returns full data if rows is set to over 99999", {
     data <- get_resource(
       res_id = gp_list_apr_2021,
       rows = 9999999,
-      row_filters = c("GPPracticeName" = "The Blue Practice")
+      row_filters = list("GPPracticeName" = "The Blue Practice")
     ),
     regexp = "Can't request over 99,999 rows"
   )
diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R
index c9a9918..cf4e3a0 100644
--- a/tests/testthat/test-parse_row_filters.R
+++ b/tests/testthat/test-parse_row_filters.R
@@ -14,7 +14,7 @@ test_that("throws error for length > 1", {
 test_that("throws error for non-unique names", {
   expect_error(
     parse_row_filters(list(x = 1, x = 2)),
-    regexp = "Only one filter per field is currently supported by `get_resource`"
+    regexp = "Only one filter per field is currently supported"
   )
 })
 

From ae59fb7a00eb879551d5c665fa152bed7dff2bdc Mon Sep 17 00:00:00 2001
From: Moohan <Moohan@users.noreply.github.com>
Date: Wed, 13 Nov 2024 18:44:09 +0000
Subject: [PATCH 4/8] Update documentation

---
 man/parse_row_filters.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/parse_row_filters.Rd b/man/parse_row_filters.Rd
index b6ed3d2..eed28c1 100644
--- a/man/parse_row_filters.Rd
+++ b/man/parse_row_filters.Rd
@@ -7,7 +7,7 @@
 parse_row_filters(row_filters)
 }
 \arguments{
-\item{row_filters}{list or named vectors matching fileds to values}
+\item{row_filters}{list or named vectors matching fields to values}
 }
 \value{
 a json as a character string

From 0d7ecda8a77c1176f5ff7f6e27cc4497b4dfcfe7 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 13 Nov 2024 20:56:28 +0000
Subject: [PATCH 5/8] Update parse_row_filters.R

---
 R/parse_row_filters.R | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R
index 67fa5bb..f17e177 100644
--- a/R/parse_row_filters.R
+++ b/R/parse_row_filters.R
@@ -8,8 +8,21 @@ parse_row_filters <- function(row_filters) {
     return(NULL)
   }
 
-  if (!inherits(row_filters, "list")) {
-    cli::cli_abort("{.arg row_filters} must be a {.cls list}, not a {.cls {class(row_filters)}}.")
+  # Check if `row_filters` is a list or a character vector
+  if (!is.list(row_filters) && !is.character(row_filters)) {
+    cli::cli_abort("{.arg row_filters} must be a named {.cls list} or a named {.cls character} vector, not a {.cls {class(row_filters)}}.")
+  }
+  
+  # If it's a list, ensure it's depth 1 and elements are named
+  if (is.list(row_filters)) {
+    if (any(lengths(row_filters) > 1) || any(names(row_filters) == "")) {
+      cli::cli_abort("{.arg row_filters} must be a list of depth 1 with named elements.")
+    }
+  }
+  
+  # If it's a character vector, ensure it's named
+  if (is.character(row_filters) && any(names(row_filters) == "")) {
+    cli::cli_abort("{.arg row_filters} must be a named character vector.")
   }
 
   # check if any filters in list have length > 1

From c83f57ff3104acc64a947352636a3c0d4600ee2b Mon Sep 17 00:00:00 2001
From: Moohan <Moohan@users.noreply.github.com>
Date: Wed, 13 Nov 2024 20:57:56 +0000
Subject: [PATCH 6/8] Style code (GHA)

---
 R/parse_row_filters.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R
index f17e177..1749629 100644
--- a/R/parse_row_filters.R
+++ b/R/parse_row_filters.R
@@ -12,14 +12,14 @@ parse_row_filters <- function(row_filters) {
   if (!is.list(row_filters) && !is.character(row_filters)) {
     cli::cli_abort("{.arg row_filters} must be a named {.cls list} or a named {.cls character} vector, not a {.cls {class(row_filters)}}.")
   }
-  
+
   # If it's a list, ensure it's depth 1 and elements are named
   if (is.list(row_filters)) {
     if (any(lengths(row_filters) > 1) || any(names(row_filters) == "")) {
       cli::cli_abort("{.arg row_filters} must be a list of depth 1 with named elements.")
     }
   }
-  
+
   # If it's a character vector, ensure it's named
   if (is.character(row_filters) && any(names(row_filters) == "")) {
     cli::cli_abort("{.arg row_filters} must be a named character vector.")

From 2fe5a2a06964b841340200119b147b1e03eb5477 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Thu, 14 Nov 2024 10:02:51 +0000
Subject: [PATCH 7/8] Improve tests and errors for `parse_row_filter`

---
 R/parse_row_filters.R                   | 24 ++++-----
 tests/testthat/test-parse_row_filters.R | 71 ++++++++++++++++++++++++-
 2 files changed, 79 insertions(+), 16 deletions(-)

diff --git a/R/parse_row_filters.R b/R/parse_row_filters.R
index 1749629..f099791 100644
--- a/R/parse_row_filters.R
+++ b/R/parse_row_filters.R
@@ -8,25 +8,21 @@ parse_row_filters <- function(row_filters) {
     return(NULL)
   }
 
-  # Check if `row_filters` is a list or a character vector
-  if (!is.list(row_filters) && !is.character(row_filters)) {
-    cli::cli_abort("{.arg row_filters} must be a named {.cls list} or a named {.cls character} vector, not a {.cls {class(row_filters)}}.")
+  # Check if `row_filters` is a list or a character or numeric vector
+  if (class(row_filters) != "list" && !is.character(row_filters) && !is.numeric(row_filters)) {
+    cli::cli_abort(
+      "{.arg row_filters} must be a named {.cls list} or a named
+      {.cls character} or {.cls numeric} vector, not a {.cls {class(row_filters)}}."
+    )
   }
 
-  # If it's a list, ensure it's depth 1 and elements are named
-  if (is.list(row_filters)) {
-    if (any(lengths(row_filters) > 1) || any(names(row_filters) == "")) {
-      cli::cli_abort("{.arg row_filters} must be a list of depth 1 with named elements.")
-    }
-  }
-
-  # If it's a character vector, ensure it's named
-  if (is.character(row_filters) && any(names(row_filters) == "")) {
-    cli::cli_abort("{.arg row_filters} must be a named character vector.")
+  # Ensure it's elements are named
+  if (is.null(names(row_filters)) || any(names(row_filters) == "")) {
+    cli::cli_abort("{.arg row_filters} should be a named {.cls list}.")
   }
 
   # check if any filters in list have length > 1
-  too_many <- sapply(row_filters, length) > 1
+  too_many <- purrr::map_lgl(row_filters, ~ length(.x) > 1)
 
   if (any(too_many)) {
     cli::cli_abort(c(
diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R
index cf4e3a0..e64a81b 100644
--- a/tests/testthat/test-parse_row_filters.R
+++ b/tests/testthat/test-parse_row_filters.R
@@ -4,10 +4,44 @@ test_that("returns NULL if `row_filters` = NULL", {
   )
 })
 
+test_that("throws error for bad types", {
+  expect_error(
+    parse_row_filters(mtcars),
+    regexp = " must be a named .+? not a "
+  )
+  expect_error(
+    parse_row_filters(c(x = TRUE)),
+    regexp = " must be a named .+? not a "
+  )
+  expect_error(
+    parse_row_filters(c(x = factor(1))),
+    regexp = " must be a named .+? not a "
+  )
+})
+
 test_that("throws error for length > 1", {
   expect_error(
     parse_row_filters(list(x = letters)),
-    regexp = "(list must only contain vectors of length 1.)"
+    regexp = " has too many values\\."
+  )
+})
+
+test_that("throws error when un-named", {
+  expect_error(
+    parse_row_filters(list(1, 2)),
+    regexp = " should be a named "
+  )
+  expect_error(
+    parse_row_filters(list(a = 1, 2)),
+    regexp = " should be a named "
+  )
+  expect_error(
+    parse_row_filters(c(1, 2)),
+    regexp = " should be a named "
+  )
+  expect_error(
+    parse_row_filters(c(a = 1, 2)),
+    regexp = " should be a named "
   )
 })
 
@@ -16,12 +50,45 @@ test_that("throws error for non-unique names", {
     parse_row_filters(list(x = 1, x = 2)),
     regexp = "Only one filter per field is currently supported"
   )
+  expect_error(
+    parse_row_filters(c(x = 1, x = 2)),
+    regexp = "Only one filter per field is currently supported"
+  )
+})
+
+
+test_that("returns JSON string from a named vector", {
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(c(x = 5.0, y = 6.0))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(c(x = 5L, y = 6L))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(c(x = "a", y = "b"))
+    )
+  )
 })
 
 test_that("returns JSON string from list", {
   expect_true(
     jsonlite::validate(
-      parse_row_filters(list(x = 5, y = 6))
+      parse_row_filters(list(x = 5.0, y = 6.0))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(list(x = 5L, y = 6L))
+    )
+  )
+  expect_true(
+    jsonlite::validate(
+      parse_row_filters(list(x = "a", y = "b"))
     )
   )
 })

From 29f88f55744e6935d5324dd333de11e7d4571f23 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Thu, 14 Nov 2024 15:27:19 +0000
Subject: [PATCH 8/8] Fix tests

Something must have changed with factors between v4.0.1 and now...
---
 tests/testthat/test-parse_row_filters.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test-parse_row_filters.R b/tests/testthat/test-parse_row_filters.R
index e64a81b..90558d5 100644
--- a/tests/testthat/test-parse_row_filters.R
+++ b/tests/testthat/test-parse_row_filters.R
@@ -14,7 +14,11 @@ test_that("throws error for bad types", {
     regexp = " must be a named .+? not a "
   )
   expect_error(
-    parse_row_filters(c(x = factor(1))),
+    parse_row_filters(c(x = NA)),
+    regexp = " must be a named .+? not a "
+  )
+  expect_error(
+    parse_row_filters(c(x = as.Date("2000-01-01"))),
     regexp = " must be a named .+? not a "
   )
 })