Merge branch 'master' into issue10

Public-Health-Scotland · Aug 1, 2024 · 68fb34f · 68fb34f
2 parents 312dea4 + 22f73ab
commit 68fb34f
Show file tree

Hide file tree

Showing 7 changed files with 64 additions and 90 deletions.
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -41,7 +41,7 @@ jobs:
 
       - name: Deploy to GitHub pages 🚀
         if: github.event_name != 'pull_request'
-        uses: JamesIves/github-pages-deploy-action@v4.5.0
+        uses: JamesIves/github-pages-deploy-action@v4.6.3
         with:
           clean: false
           branch: gh-pages

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,12 +16,12 @@ BugReports: https://github.com/Public-Health-Scotland/phsopendata/issues
 Imports:
     cli,
     dplyr (>= 1.0.0),
-    glue (>= 1.0.0),
     httr (>= 1.0.0),
     jsonlite (>= 1.0.0),
     magrittr (>= 1.0.0),
     purrr,
     readr (>= 1.0.0),
+    rlang (>= 1.0.0),
     stringdist,
     tibble (>= 3.0.0),
     xml2

diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R
@@ -1,33 +1,22 @@
 #' Get the latest resource from a data set
 #'
-#' `get_dataset_additional_info()` returns the most
-#' recently uploaded resource to a dataset
+#' Returns the latest resource available in a dataset.
 #'
 #' There are some datasets on the open data platform that
 #' keep historic resources instead of updating existing ones.
 #' For these it is useful to be able to retrieve the latest
-#' resource. As of 5.7.2024 these data sets include:
+#' resource. As of 1.8.2024 these data sets include:
 #' * gp-practice-populations
 #' * gp-practice-contact-details-and-list-sizes
 #' * nhsscotland-payments-to-general-practice
 #' * dental-practices-and-patient-registrations
 #' * general-practitioner-contact-details
 #' * prescribed-dispensed
-#' * prescriptions-in-the-community
+#' * dispenser-location-contact-details
 #' * community-pharmacy-contractor-activity
 #'
-#' @param dataset_name name of the dataset as found on
-#' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform}
-#' @param rows (optional) specify the max number of rows to return.
-#' @param row_filters (optional) a named list or vector that specifies values of
-#'  columns/fields to keep.
-#' e.g. list(Date = 20220216, Sex = "Female").
-#' @param col_select (optional) a character vector containing the names of
-#' desired columns/fields.
-#' e.g. c("Date", "Sex").
-#' @param include_context (optional) If `TRUE` additional information about the
-#' resource will be added as columns to the data, including the resource ID, the
-#' resource name, the creation date and the last modified/updated date.
+#' @inheritParams get_dataset
+#' @inheritParams get_resource
 #'
 #' @return a [tibble][tibble::tibble-package] with the data
 #' @export
@@ -50,15 +39,44 @@ get_latest_resource <- function(dataset_name,
                                 rows = NULL,
                                 row_filters = NULL,
                                 col_select = NULL,
-                                include_context = FALSE) {
+                                include_context = TRUE) {
+  applicable_datasets <- c(
+    "community-pharmacy-contractor-activity",
+    "dental-practices-and-patient-registrations",
+    "dispenser-location-contact-details",
+    "general-practitioner-contact-details",
+    "gp-practice-contact-details-and-list-sizes",
+    "gp-practice-populations",
+    "nhsscotland-payments-to-general-practice",
+    "prescribed-dispensed"
+  )
+
+  # check if data set is within applicable datasets
+  # throw error if not
+  if (!dataset_name %in% applicable_datasets) {
+    cli::cli_abort(
+      c(
+        "The dataset name supplied {.val {dataset_name}} is not within the applicable datasets.
+      These are: {.val {applicable_datasets}}",
+        "x" = "Please see {.fun get_latest_resource} documentation.",
+        "i" = "You can find dataset names in the URL
+      of a dataset's page on {.url www.opendata.nhs.scot}."
+      ),
+      call = rlang::caller_env()
+    )
+  }
+
+
   # get the latest resource id
   id <- get_latest_resource_id(dataset_name)
 
-  return_value <- get_resource(
-    id,
-    rows,
-    row_filters,
-    col_select,
-    include_context
+  data <- get_resource(
+    res_id = id,
+    rows = rows,
+    row_filters = row_filters,
+    col_select = col_select,
+    include_context = include_context
   )
+
+  return(data)
 }
diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R
@@ -16,49 +16,9 @@
 #'
 #' @return a string with the resource id
 get_latest_resource_id <- function(dataset_name) {
-  applicable_datasets <- c(
-    "gp-practice-populations", "gp-practice-contact-details-and-list-sizes",
-    "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations",
-    "general-practitioner-contact-details", "prescribed-dispensed",
-    "prescriptions-in-the-community", "community-pharmacy-contractor-activity"
-  )
-
-  # throw error if name type/format is invalid
-  check_dataset_name(dataset_name)
-
-  # define query and try API call
-  query <- list("id" = dataset_name)
-  content <- try(
-    phs_GET("package_show", query),
-    silent = TRUE
-  )
-
-
-  # check if data set is within applicable datasets
-  # throw error if not
-  if (!dataset_name %in% applicable_datasets) {
-    cli::cli_abort(c(
-      "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets.
-      These are:\n
-      {.var {applicable_datasets}}",
-      "x" = "Please see get_latest_reource documentation.",
-      "i" = "You can find dataset names in the URL
-      of a dataset's page on {.url www.opendata.nhs.scot}."
-    ))
-  }
-
-  # if content contains a 'Not Found Error'
-  # throw error with suggested dataset name
-  if (grepl("Not Found Error", content[1])) {
-    suggest_dataset_name(dataset_name)
-  }
-
   # send the api request
   query <- list("id" = dataset_name)
-  content <- try(
-    phs_GET("package_show", query),
-    silent = TRUE
-  )
+  content <- phs_GET("package_show", query)
 
   # retrieve the resource id's from returned contect
   all_ids <- purrr::map_chr(content$result$resources, ~ .x$id)
@@ -69,10 +29,10 @@ get_latest_resource_id <- function(dataset_name) {
   created_date <- c()
   modified_date <- c()
 
-  for (i in content$result$resources) {
-    id <- append(id, i$id)
-    created_date <- append(created_date, i$created)
-    modified_date <- append(modified_date, i$last_modified)
+  for (res in content$result$resources) {
+    id <- append(id, res$id)
+    created_date <- append(created_date, res$created)
+    modified_date <- append(modified_date, res$last_modified)
   }
   all_id_data <- tibble::tibble(
     id = id,
@@ -81,16 +41,15 @@ get_latest_resource_id <- function(dataset_name) {
   ) %>%
     dplyr::mutate(most_recent_date_created = max(created_date))
 
-  # get the first row of the rources, this will be the same that appears on the top
+  # get the first row of the resources, this will be the same that appears on the top
   # on the open data platform
   all_id_data_first_row <- all_id_data %>%
     dplyr::slice(1)
 
-  # if the resource at the top as appearing on the open data platform also has the most
-  # recent date created, return it. Otherwise return warning
+  # If the resource at the top as appearing on the open data platform also has the most
+  # recent date created, return it. Otherwise, error
   if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) {
     return(all_id_data_first_row$id)
-  } else {
-    (warning("most recent id could not be identified"))
   }
+  cli::cli_abort("The most recent id could not be identified")
 }
diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd
diff --git a/tests/testthat/test-get_latest_resource.R b/tests/testthat/test-get_latest_resource.R
@@ -1,10 +1,7 @@
-test_that("returns more than 0 datasets", {
-  # select the first row of the tibble and get the
-  # number of rows. If no datasets were returned
-  # this will be 0
-  expect_equal(nrow(dplyr::slice(get_latest_resource("gp-practice-populations"), 1)), 1)
+test_that("returns data for a dataset that is listed", {
+  expect_no_error(get_latest_resource("gp-practice-populations"))
 })
 
-test_that("returns data in the expected format", {
-  expect_s3_class(get_latest_resource("gp-practice-populations"), "tbl_df")
+test_that("returns error for a dataset that is not listed", {
+  expect_error(get_latest_resource("hospital-codes"))
 })
diff --git a/tests/testthat/test-get_latest_resource_id.R b/tests/testthat/test-get_latest_resource_id.R
@@ -1,7 +1,7 @@
 test_that("returns data for a dataset that is listed", {
-  expect_no_error(get_latest_resource_id("gp-practice-populations"))
+  expect_no_error(get_latest_resource("gp-practice-populations"))
 })
 
 test_that("returns error for a dataset that is not listed", {
-  expect_error(get_latest_resource_id("hospital-codes"), "The dataset name supplied `hospital-codes` is not within the applicable datasets")
+  expect_error(get_latest_resource("hospital-codes"))
 })