From 223527d547bd89a1bec0f654de6b77bfbc93236d Mon Sep 17 00:00:00 2001 From: Moohan Date: Thu, 4 Jul 2024 12:35:26 +0000 Subject: [PATCH 01/25] Update documentation --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index f328f7b..2c40164 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,4 +32,4 @@ Config/testthat/parallel: true Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 From 05bd5048a764bbfb764c9b660359c4c3f6ee5540 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 8 Jul 2024 09:52:40 +0100 Subject: [PATCH 02/25] Split `get_latest_resource` to its own branch --- NAMESPACE | 1 + R/get_latest_resource.R | 64 +++++++++++++ R/get_latest_resource_id.R | 96 ++++++++++++++++++++ man/get_latest_resource.Rd | 70 ++++++++++++++ man/get_latest_resource_id.Rd | 29 ++++++ tests/testthat/test-get_latest_resource_id.R | 7 ++ 6 files changed, 267 insertions(+) create mode 100644 R/get_latest_resource.R create mode 100644 R/get_latest_resource_id.R create mode 100644 man/get_latest_resource.Rd create mode 100644 man/get_latest_resource_id.Rd create mode 100644 tests/testthat/test-get_latest_resource_id.R diff --git a/NAMESPACE b/NAMESPACE index 219181b..b42714e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,6 +2,7 @@ export("%>%") export(get_dataset) +export(get_latest_resource) export(get_resource) export(get_resource_sql) importFrom(magrittr,"%>%") diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R new file mode 100644 index 0000000..defbf8b --- /dev/null +++ b/R/get_latest_resource.R @@ -0,0 +1,64 @@ +#' Get the latest resource from a data set +#' +#' `get_dataset_additional_info()` returns the most +#' recently uploaded resource to a dataset +#' +#' There are some datasets on the open data platform that +#' keep historic resources instead of updating existing ones. +#' For these it is useful to be able to retrieve the latest +#' resource. As of 5.7.2024 these data sets include: +#' * gp-practice-populations +#' * gp-practice-contact-details-and-list-sizes +#' * nhsscotland-payments-to-general-practice +#' * dental-practices-and-patient-registrations +#' * general-practitioner-contact-details +#' * prescribed-dispensed +#' * prescriptions-in-the-community +#' * community-pharmacy-contractor-activity +#' +#' @param dataset_name name of the dataset as found on +#' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform} +#' @param rows (optional) specify the max number of rows to return. +#' @param row_filters (optional) a named list or vector that specifies values of +#' columns/fields to keep. +#' e.g. list(Date = 20220216, Sex = "Female"). +#' @param col_select (optional) a character vector containing the names of +#' desired columns/fields. +#' e.g. c("Date", "Sex"). +#' @param include_context (optional) If `TRUE` additional information about the +#' resource will be added as columns to the data, including the resource ID, the +#' resource name, the creation date and the last modified/updated date. +#' +#' @return a [tibble][tibble::tibble-package] with the data +#' @export +#' +#' @examples +#' dataset_name <- "gp-practice-contact-details-and-list-sizes" +#' +#' data <- get_latest_resource(dataset_name) +#' +#' filters <- list("Postcode" = "DD11 1ES") +#' wanted_cols <- c("PracticeCode", "Postcode", "Dispensing") +#' +#' filtered_data <- get_latest_resource( +#' dataset_name = dataset_name, +#' row_filters = filters, +#' col_select = wanted_cols +#' ) +#' +get_latest_resource <- function(dataset_name, + rows = NULL, + row_filters = NULL, + col_select = NULL, + include_context = FALSE) { + # get the latest resource id + id <- get_latest_resource_id(dataset_name) + + return_value <- get_resource( + id, + rows, + row_filters, + col_select, + include_context + ) +} diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R new file mode 100644 index 0000000..f693810 --- /dev/null +++ b/R/get_latest_resource_id.R @@ -0,0 +1,96 @@ +#' get_latest_resource_id +#' +#' to be confident that the resource returned is the one intended +#' two conditions have to be met. It has to appear at the top of +#' of the resource list as shown on the open data platform. +#' The order they are returned via the api is the same +#' as they appear on the open data platform. It also +#' has to have the most recent date created +#' +#' There are only some datasets that this functionality +#' is relevant to, these are listed within applicable +#' datasets and are the datasets that keep historic +#' resources instead of over writing them. +#' +#' @inheritParams get_dataset +#' +#' @return a string with the resource id +get_latest_resource_id <- function(dataset_name) { + applicable_datasets <- c( + "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", + "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", + "general-practitioner-contact-details", "prescribed-dispensed", + "prescriptions-in-the-community", "community-pharmacy-contractor-activity" + ) + + # throw error if name type/format is invalid + check_dataset_name(dataset_name) + + # define query and try API call + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + + # check if data set is within applicable datasets + # throw error if not + if (!dataset_name %in% applicable_datasets) { + cli::cli_abort(c( + "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. + These are:\n + {.var {applicable_datasets}}", + "x" = "Please see get_latest_reource documentation.", + "i" = "You can find dataset names in the URL + of a dataset's page on {.url www.opendata.nhs.scot}." + )) + } + + # if content contains a 'Not Found Error' + # throw error with suggested dataset name + if (grepl("Not Found Error", content[1])) { + suggest_dataset_name(dataset_name) + } + + # send the api request + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + # retrieve the resource id's from returned contect + all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) + + + # add the id, created date and last_modified to a dataframe + id <- c() + created_date <- c() + modified_date <- c() + + for (i in content$result$resources) { + id <- append(id, i$id) + created_date <- append(created_date, i$created) + modified_date <- append(modified_date, i$last_modified) + } + all_id_data <- tibble::tibble( + id = id, + created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), + modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC") + ) %>% + dplyr::mutate(most_recent_date_created = max(created_date)) + + # get the first row of the rources, this will be the same that appears on the top + # on the open data platform + all_id_data_first_row <- all_id_data %>% + dplyr::slice(1) + + # if the resource at the top as appearing on the open data platform also has the most + # recent date created, return it. Otherwise return warning + if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) { + return(all_id_data_first_row$id) + } else { + (warning("most recent id could not be identified")) + } +} diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd new file mode 100644 index 0000000..26b2202 --- /dev/null +++ b/man/get_latest_resource.Rd @@ -0,0 +1,70 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_latest_resource.R +\name{get_latest_resource} +\alias{get_latest_resource} +\title{Get the latest resource from a data set} +\usage{ +get_latest_resource( + dataset_name, + rows = NULL, + row_filters = NULL, + col_select = NULL, + include_context = FALSE +) +} +\arguments{ +\item{dataset_name}{name of the dataset as found on +\href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} + +\item{rows}{(optional) specify the max number of rows to return.} + +\item{row_filters}{(optional) a named list or vector that specifies values of +columns/fields to keep. +e.g. list(Date = 20220216, Sex = "Female").} + +\item{col_select}{(optional) a character vector containing the names of +desired columns/fields. +e.g. c("Date", "Sex").} + +\item{include_context}{(optional) If \code{TRUE} additional information about the +resource will be added as columns to the data, including the resource ID, the +resource name, the creation date and the last modified/updated date.} +} +\value{ +a \link[tibble:tibble-package]{tibble} with the data +} +\description{ +\code{get_dataset_additional_info()} returns the most +recently uploaded resource to a dataset +} +\details{ +There are some datasets on the open data platform that +keep historic resources instead of updating existing ones. +For these it is useful to be able to retrieve the latest +resource. As of 5.7.2024 these data sets include: +\itemize{ +\item gp-practice-populations +\item gp-practice-contact-details-and-list-sizes +\item nhsscotland-payments-to-general-practice +\item dental-practices-and-patient-registrations +\item general-practitioner-contact-details +\item prescribed-dispensed +\item prescriptions-in-the-community +\item community-pharmacy-contractor-activity +} +} +\examples{ +dataset_name <- "gp-practice-contact-details-and-list-sizes" + +data <- get_latest_resource(dataset_name) + +filters <- list("Postcode" = "DD11 1ES") +wanted_cols <- c("PracticeCode", "Postcode", "Dispensing") + +filtered_data <- get_latest_resource( + dataset_name = dataset_name, + row_filters = filters, + col_select = wanted_cols +) + +} diff --git a/man/get_latest_resource_id.Rd b/man/get_latest_resource_id.Rd new file mode 100644 index 0000000..d79e6ae --- /dev/null +++ b/man/get_latest_resource_id.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_latest_resource_id.R +\name{get_latest_resource_id} +\alias{get_latest_resource_id} +\title{get_latest_resource_id} +\usage{ +get_latest_resource_id(dataset_name) +} +\arguments{ +\item{dataset_name}{name of the dataset as found on +\href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} +} +\value{ +a string with the resource id +} +\description{ +to be confident that the resource returned is the one intended +two conditions have to be met. It has to appear at the top of +of the resource list as shown on the open data platform. +The order they are returned via the api is the same +as they appear on the open data platform. It also +has to have the most recent date created +} +\details{ +There are only some datasets that this functionality +is relevant to, these are listed within applicable +datasets and are the datasets that keep historic +resources instead of over writing them. +} diff --git a/tests/testthat/test-get_latest_resource_id.R b/tests/testthat/test-get_latest_resource_id.R new file mode 100644 index 0000000..c2e7569 --- /dev/null +++ b/tests/testthat/test-get_latest_resource_id.R @@ -0,0 +1,7 @@ +test_that("returns data for a dataset that is listed", { + expect_no_error(get_latest_resource_id("gp-practice-populations")) +}) + +test_that("returns error for a dataset that is not listed", { + expect_error(get_latest_resource_id("hospital-codes")) +}) From b0183827cc372790b536872a7f046bcd5409bb4d Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:14:36 +0100 Subject: [PATCH 03/25] Update R/get_latest_resource.R Co-authored-by: James McMahon --- R/get_latest_resource.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index defbf8b..04e14d1 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -54,11 +54,13 @@ get_latest_resource <- function(dataset_name, # get the latest resource id id <- get_latest_resource_id(dataset_name) - return_value <- get_resource( + data <- get_resource( id, rows, row_filters, col_select, include_context ) + + return(data) } From 5c25adaaeefdff62fe770ff2bf13acf1956c2fb1 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:15:13 +0100 Subject: [PATCH 04/25] Update documentation R/get_latest_resource.R Co-authored-by: James McMahon --- R/get_latest_resource.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 04e14d1..c7bed2a 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -1,7 +1,6 @@ #' Get the latest resource from a data set #' -#' `get_dataset_additional_info()` returns the most -#' recently uploaded resource to a dataset +#' Returns the latest resource available in a dataset. #' #' There are some datasets on the open data platform that #' keep historic resources instead of updating existing ones. From 7e8aa1222058c9f4006e657e8bd28b5e6554b3cb Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:17:08 +0100 Subject: [PATCH 05/25] Update R/get_latest_resource_id.R Co-authored-by: James McMahon --- R/get_latest_resource_id.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index f693810..e4eef65 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -44,7 +44,8 @@ get_latest_resource_id <- function(dataset_name) { "x" = "Please see get_latest_reource documentation.", "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." - )) + ), + call = rlang::caller_env()) } # if content contains a 'Not Found Error' From bfb712153053eb1a119db52097db614e0e9c2100 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:17:37 +0100 Subject: [PATCH 06/25] Update R/get_latest_resource_id.R Co-authored-by: James McMahon --- R/get_latest_resource_id.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index e4eef65..d242aa0 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -56,10 +56,7 @@ get_latest_resource_id <- function(dataset_name) { # send the api request query <- list("id" = dataset_name) - content <- try( - phs_GET("package_show", query), - silent = TRUE - ) + content <- phs_GET("package_show", query) # retrieve the resource id's from returned contect all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) From 5d07f46a7cd85b606f729c79c554f8e03bd57bca Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:17:57 +0100 Subject: [PATCH 07/25] Update R/get_latest_resource_id.R Co-authored-by: James McMahon --- R/get_latest_resource_id.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index d242aa0..c519b6e 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -67,10 +67,10 @@ get_latest_resource_id <- function(dataset_name) { created_date <- c() modified_date <- c() - for (i in content$result$resources) { - id <- append(id, i$id) - created_date <- append(created_date, i$created) - modified_date <- append(modified_date, i$last_modified) + for (res in content$result$resources) { + id <- append(id, res$id) + created_date <- append(created_date, res$created) + modified_date <- append(modified_date, res$last_modified) } all_id_data <- tibble::tibble( id = id, From 18172c28f9feefa77779386d04d3c67986e9da00 Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:18:45 +0100 Subject: [PATCH 08/25] Update R/get_latest_resource_id.R Co-authored-by: James McMahon --- R/get_latest_resource_id.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index c519b6e..3f89deb 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -84,11 +84,10 @@ get_latest_resource_id <- function(dataset_name) { all_id_data_first_row <- all_id_data %>% dplyr::slice(1) - # if the resource at the top as appearing on the open data platform also has the most - # recent date created, return it. Otherwise return warning + # If the resource at the top as appearing on the open data platform also has the most + # recent date created, return it. Otherwise, error if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) { return(all_id_data_first_row$id) - } else { - (warning("most recent id could not be identified")) - } + } + cli::cli_abort("The most recent id could not be identified")) } From 010310d39de9cbcf664b1d6530107acaf0f8578b Mon Sep 17 00:00:00 2001 From: ross-hull Date: Tue, 16 Jul 2024 14:20:12 +0000 Subject: [PATCH 09/25] Style code (GHA) --- R/get_latest_resource.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index c7bed2a..8aac351 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -60,6 +60,6 @@ get_latest_resource <- function(dataset_name, col_select, include_context ) - + return(data) } From 8721386df1c1ce147d1116952609b48dc322e805 Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 16 Jul 2024 16:09:31 +0100 Subject: [PATCH 10/25] move dataset name checks to get_latest_resource --- R/get_latest_resource.R | 40 ++++++++++++++++++++++++++++++++++++++ R/get_latest_resource_id.R | 40 ++------------------------------------ 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 8aac351..2532bdd 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -50,6 +50,46 @@ get_latest_resource <- function(dataset_name, row_filters = NULL, col_select = NULL, include_context = FALSE) { + + + applicable_datasets <- c( + "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", + "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", + "general-practitioner-contact-details", "prescribed-dispensed", + "prescriptions-in-the-community", "community-pharmacy-contractor-activity" + ) + + # throw error if name type/format is invalid + check_dataset_name(dataset_name) + + # define query and try API call + query <- list("id" = dataset_name) + content <- try( + phs_GET("package_show", query), + silent = TRUE + ) + + # if content contains a 'Not Found Error' + # throw error with suggested dataset name + if (grepl("Not Found Error", content[1])) { + suggest_dataset_name(dataset_name) + } + + # check if data set is within applicable datasets + # throw error if not + if (!dataset_name %in% applicable_datasets) { + cli::cli_abort(c( + "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. + These are:\n + {.var {applicable_datasets}}", + "x" = "Please see get_latest_reource documentation.", + "i" = "You can find dataset names in the URL + of a dataset's page on {.url www.opendata.nhs.scot}." + ), + call = rlang::caller_env()) + } + + # get the latest resource id id <- get_latest_resource_id(dataset_name) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index 3f89deb..cf353cd 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -16,43 +16,7 @@ #' #' @return a string with the resource id get_latest_resource_id <- function(dataset_name) { - applicable_datasets <- c( - "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", - "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", - "general-practitioner-contact-details", "prescribed-dispensed", - "prescriptions-in-the-community", "community-pharmacy-contractor-activity" - ) - # throw error if name type/format is invalid - check_dataset_name(dataset_name) - - # define query and try API call - query <- list("id" = dataset_name) - content <- try( - phs_GET("package_show", query), - silent = TRUE - ) - - - # check if data set is within applicable datasets - # throw error if not - if (!dataset_name %in% applicable_datasets) { - cli::cli_abort(c( - "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. - These are:\n - {.var {applicable_datasets}}", - "x" = "Please see get_latest_reource documentation.", - "i" = "You can find dataset names in the URL - of a dataset's page on {.url www.opendata.nhs.scot}." - ), - call = rlang::caller_env()) - } - - # if content contains a 'Not Found Error' - # throw error with suggested dataset name - if (grepl("Not Found Error", content[1])) { - suggest_dataset_name(dataset_name) - } # send the api request query <- list("id" = dataset_name) @@ -88,6 +52,6 @@ get_latest_resource_id <- function(dataset_name) { # recent date created, return it. Otherwise, error if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) { return(all_id_data_first_row$id) - } - cli::cli_abort("The most recent id could not be identified")) + } + cli::cli_abort("The most recent id could not be identified") } From a22acf3a9b316d352af1d37e5cfe9208b4801666 Mon Sep 17 00:00:00 2001 From: ross hull Date: Tue, 16 Jul 2024 16:10:08 +0100 Subject: [PATCH 11/25] change tests to accomodate moving dataset checks to get_latest_resource --- tests/testthat/test-get_latest_resource.R | 7 +++++++ tests/testthat/test-get_latest_resource_id.R | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 tests/testthat/test-get_latest_resource.R diff --git a/tests/testthat/test-get_latest_resource.R b/tests/testthat/test-get_latest_resource.R new file mode 100644 index 0000000..6defe90 --- /dev/null +++ b/tests/testthat/test-get_latest_resource.R @@ -0,0 +1,7 @@ +test_that("returns data for a dataset that is listed", { + expect_no_error(get_latest_resource("gp-practice-populations")) +}) + +test_that("returns error for a dataset that is not listed", { + expect_error(get_latest_resource("hospital-codes")) +}) diff --git a/tests/testthat/test-get_latest_resource_id.R b/tests/testthat/test-get_latest_resource_id.R index c2e7569..6defe90 100644 --- a/tests/testthat/test-get_latest_resource_id.R +++ b/tests/testthat/test-get_latest_resource_id.R @@ -1,7 +1,7 @@ test_that("returns data for a dataset that is listed", { - expect_no_error(get_latest_resource_id("gp-practice-populations")) + expect_no_error(get_latest_resource("gp-practice-populations")) }) test_that("returns error for a dataset that is not listed", { - expect_error(get_latest_resource_id("hospital-codes")) + expect_error(get_latest_resource("hospital-codes")) }) From 4e2da8eb1ee0b5845f02dea2c86ad7e383cdac59 Mon Sep 17 00:00:00 2001 From: ross-hull Date: Tue, 16 Jul 2024 15:11:44 +0000 Subject: [PATCH 12/25] Style code (GHA) --- R/get_latest_resource.R | 16 ++++++++-------- R/get_latest_resource_id.R | 2 -- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 2532bdd..30cbfa7 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -50,8 +50,6 @@ get_latest_resource <- function(dataset_name, row_filters = NULL, col_select = NULL, include_context = FALSE) { - - applicable_datasets <- c( "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", @@ -78,15 +76,17 @@ get_latest_resource <- function(dataset_name, # check if data set is within applicable datasets # throw error if not if (!dataset_name %in% applicable_datasets) { - cli::cli_abort(c( - "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. + cli::cli_abort( + c( + "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. These are:\n {.var {applicable_datasets}}", - "x" = "Please see get_latest_reource documentation.", - "i" = "You can find dataset names in the URL + "x" = "Please see get_latest_reource documentation.", + "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." - ), - call = rlang::caller_env()) + ), + call = rlang::caller_env() + ) } diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index cf353cd..d9d534b 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -16,8 +16,6 @@ #' #' @return a string with the resource id get_latest_resource_id <- function(dataset_name) { - - # send the api request query <- list("id" = dataset_name) content <- phs_GET("package_show", query) From 1ea8c6ed50d0710749677f86cf36458c43514fac Mon Sep 17 00:00:00 2001 From: ross hull Date: Wed, 31 Jul 2024 10:23:07 +0100 Subject: [PATCH 13/25] remove unnecisary dataset name checks get_latest_resource --- R/get_latest_resource.R | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 30cbfa7..8da4e9a 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -57,21 +57,8 @@ get_latest_resource <- function(dataset_name, "prescriptions-in-the-community", "community-pharmacy-contractor-activity" ) - # throw error if name type/format is invalid - check_dataset_name(dataset_name) - # define query and try API call - query <- list("id" = dataset_name) - content <- try( - phs_GET("package_show", query), - silent = TRUE - ) - # if content contains a 'Not Found Error' - # throw error with suggested dataset name - if (grepl("Not Found Error", content[1])) { - suggest_dataset_name(dataset_name) - } # check if data set is within applicable datasets # throw error if not From 8eaaa78f72e0e66305d61a8e0e7b33247990bdef Mon Sep 17 00:00:00 2001 From: ross-hull Date: Wed, 31 Jul 2024 09:32:50 +0000 Subject: [PATCH 14/25] Update documentation --- man/get_latest_resource.Rd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index 26b2202..02b9f6c 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -34,8 +34,7 @@ resource name, the creation date and the last modified/updated date.} a \link[tibble:tibble-package]{tibble} with the data } \description{ -\code{get_dataset_additional_info()} returns the most -recently uploaded resource to a dataset +Returns the latest resource available in a dataset. } \details{ There are some datasets on the open data platform that From 2068ad15cd16ef21644272223e9246d93c17d86d Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:51:13 +0100 Subject: [PATCH 15/25] Update R/get_latest_resource.R Co-authored-by: James McMahon --- R/get_latest_resource.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 8da4e9a..451e8f9 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -68,7 +68,7 @@ get_latest_resource <- function(dataset_name, "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. These are:\n {.var {applicable_datasets}}", - "x" = "Please see get_latest_reource documentation.", + "x" = "Please see {.fun get_latest_resource} documentation.", "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." ), From dd4a3ae913a3feb3ab9d311d4b910b48043c617b Mon Sep 17 00:00:00 2001 From: ross-hull <117927497+ross-hull@users.noreply.github.com> Date: Wed, 31 Jul 2024 13:51:42 +0100 Subject: [PATCH 16/25] Update R/get_latest_resource.R Co-authored-by: James McMahon --- R/get_latest_resource.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 451e8f9..296e459 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -81,11 +81,11 @@ get_latest_resource <- function(dataset_name, id <- get_latest_resource_id(dataset_name) data <- get_resource( - id, - rows, - row_filters, - col_select, - include_context + res_id = id, + rows = rows, + row_filters = row_filters, + col_select = col_select, + include_context = include_context ) return(data) From fea33faaeac1abea8e7798ed25308a83921dcaa9 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 1 Aug 2024 10:19:08 +0100 Subject: [PATCH 17/25] Add `{rlang}` to imports --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 2c40164..443d478 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,7 @@ Imports: magrittr (>= 1.0.0), purrr, readr (>= 1.0.0), + rlang (>= 1.0.0), stringdist, tibble (>= 3.0.0), xml2 From 663335ead727ef2dcdab2d799a5b858a3b7e1994 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 1 Aug 2024 10:31:40 +0100 Subject: [PATCH 18/25] Use `@inheritParams` to simplify documentation --- R/get_latest_resource.R | 14 ++------------ man/get_latest_resource.Rd | 3 ++- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 296e459..bf8d6ac 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -15,18 +15,8 @@ #' * prescriptions-in-the-community #' * community-pharmacy-contractor-activity #' -#' @param dataset_name name of the dataset as found on -#' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform} -#' @param rows (optional) specify the max number of rows to return. -#' @param row_filters (optional) a named list or vector that specifies values of -#' columns/fields to keep. -#' e.g. list(Date = 20220216, Sex = "Female"). -#' @param col_select (optional) a character vector containing the names of -#' desired columns/fields. -#' e.g. c("Date", "Sex"). -#' @param include_context (optional) If `TRUE` additional information about the -#' resource will be added as columns to the data, including the resource ID, the -#' resource name, the creation date and the last modified/updated date. +#' @inheritParams get_dataset +#' @inheritParams get_resource #' #' @return a [tibble][tibble::tibble-package] with the data #' @export diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index 02b9f6c..c5e6e7a 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -16,7 +16,8 @@ get_latest_resource( \item{dataset_name}{name of the dataset as found on \href{https://www.opendata.nhs.scot/}{NHS Open Data platform}} -\item{rows}{(optional) specify the max number of rows to return.} +\item{rows}{(optional) specify the max number of rows +to return for each resource.} \item{row_filters}{(optional) a named list or vector that specifies values of columns/fields to keep. From 912eca4e20447b667ccdd3d014902c27a9e40f2a Mon Sep 17 00:00:00 2001 From: ross hull Date: Thu, 1 Aug 2024 11:22:17 +0100 Subject: [PATCH 19/25] update applicable datasets --- R/get_latest_resource.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index bf8d6ac..734adda 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -5,14 +5,14 @@ #' There are some datasets on the open data platform that #' keep historic resources instead of updating existing ones. #' For these it is useful to be able to retrieve the latest -#' resource. As of 5.7.2024 these data sets include: +#' resource. As of 1.8.2024 these data sets include: #' * gp-practice-populations #' * gp-practice-contact-details-and-list-sizes #' * nhsscotland-payments-to-general-practice #' * dental-practices-and-patient-registrations #' * general-practitioner-contact-details #' * prescribed-dispensed -#' * prescriptions-in-the-community +#' * dispenser-location-contact-details #' * community-pharmacy-contractor-activity #' #' @inheritParams get_dataset @@ -44,7 +44,7 @@ get_latest_resource <- function(dataset_name, "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", "general-practitioner-contact-details", "prescribed-dispensed", - "prescriptions-in-the-community", "community-pharmacy-contractor-activity" + "dispenser-location-contact-details", "community-pharmacy-contractor-activity" ) From 6e2aee9eeba322c85e94c28270483c38fa1a651c Mon Sep 17 00:00:00 2001 From: ross-hull Date: Thu, 1 Aug 2024 10:24:23 +0000 Subject: [PATCH 20/25] Update documentation --- man/get_latest_resource.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index c5e6e7a..aaa1faa 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -41,7 +41,7 @@ Returns the latest resource available in a dataset. There are some datasets on the open data platform that keep historic resources instead of updating existing ones. For these it is useful to be able to retrieve the latest -resource. As of 5.7.2024 these data sets include: +resource. As of 1.8.2024 these data sets include: \itemize{ \item gp-practice-populations \item gp-practice-contact-details-and-list-sizes @@ -49,7 +49,7 @@ resource. As of 5.7.2024 these data sets include: \item dental-practices-and-patient-registrations \item general-practitioner-contact-details \item prescribed-dispensed -\item prescriptions-in-the-community +\item dispenser-location-contact-details \item community-pharmacy-contractor-activity } } From 5748bfe068f68a85581188841d73a4c9c16871c1 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 1 Aug 2024 14:11:27 +0100 Subject: [PATCH 21/25] Present datasets as 'values' --- R/get_latest_resource.R | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 734adda..0765fa2 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -47,17 +47,13 @@ get_latest_resource <- function(dataset_name, "dispenser-location-contact-details", "community-pharmacy-contractor-activity" ) - - - # check if data set is within applicable datasets # throw error if not if (!dataset_name %in% applicable_datasets) { cli::cli_abort( c( - "The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. - These are:\n - {.var {applicable_datasets}}", + "The dataset name supplied {.val {dataset_name}} is not within the applicable datasets. + These are: {.val {applicable_datasets}}", "x" = "Please see {.fun get_latest_resource} documentation.", "i" = "You can find dataset names in the URL of a dataset's page on {.url www.opendata.nhs.scot}." From 40d7d3f1d473fd2c6f9446b1c21d5b3be39dda56 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 1 Aug 2024 14:11:55 +0100 Subject: [PATCH 22/25] Sort the list of 'applicable datasets' for easier maintenance --- R/get_latest_resource.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 0765fa2..8c04d5b 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -41,10 +41,14 @@ get_latest_resource <- function(dataset_name, col_select = NULL, include_context = FALSE) { applicable_datasets <- c( - "gp-practice-populations", "gp-practice-contact-details-and-list-sizes", - "nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", - "general-practitioner-contact-details", "prescribed-dispensed", - "dispenser-location-contact-details", "community-pharmacy-contractor-activity" + "community-pharmacy-contractor-activity", + "dental-practices-and-patient-registrations", + "dispenser-location-contact-details", + "general-practitioner-contact-details", + "gp-practice-contact-details-and-list-sizes", + "gp-practice-populations", + "nhsscotland-payments-to-general-practice", + "prescribed-dispensed" ) # check if data set is within applicable datasets From 6786084490b9e1539f65772238fc24876cff1ee9 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 1 Aug 2024 14:14:36 +0100 Subject: [PATCH 23/25] Fix typo in comment --- R/get_latest_resource_id.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_latest_resource_id.R b/R/get_latest_resource_id.R index d9d534b..4bdabbe 100644 --- a/R/get_latest_resource_id.R +++ b/R/get_latest_resource_id.R @@ -41,7 +41,7 @@ get_latest_resource_id <- function(dataset_name) { ) %>% dplyr::mutate(most_recent_date_created = max(created_date)) - # get the first row of the rources, this will be the same that appears on the top + # get the first row of the resources, this will be the same that appears on the top # on the open data platform all_id_data_first_row <- all_id_data %>% dplyr::slice(1) From 251835545f8f78f02f1bd2a2de47fd527e73e2ba Mon Sep 17 00:00:00 2001 From: James McMahon Date: Thu, 1 Aug 2024 17:39:42 +0100 Subject: [PATCH 24/25] Update get_latest_resource.R --- R/get_latest_resource.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_latest_resource.R b/R/get_latest_resource.R index 8c04d5b..b85d277 100644 --- a/R/get_latest_resource.R +++ b/R/get_latest_resource.R @@ -39,7 +39,7 @@ get_latest_resource <- function(dataset_name, rows = NULL, row_filters = NULL, col_select = NULL, - include_context = FALSE) { + include_context = TRUE) { applicable_datasets <- c( "community-pharmacy-contractor-activity", "dental-practices-and-patient-registrations", From a82b253caef7a32f2e81f63b4909bdecc3567dfb Mon Sep 17 00:00:00 2001 From: Moohan Date: Thu, 1 Aug 2024 16:41:18 +0000 Subject: [PATCH 25/25] Update documentation --- man/get_latest_resource.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/get_latest_resource.Rd b/man/get_latest_resource.Rd index aaa1faa..9a10e87 100644 --- a/man/get_latest_resource.Rd +++ b/man/get_latest_resource.Rd @@ -9,7 +9,7 @@ get_latest_resource( rows = NULL, row_filters = NULL, col_select = NULL, - include_context = FALSE + include_context = TRUE ) } \arguments{