-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Split
get_latest_resource
to its own branch
- Loading branch information
Showing
6 changed files
with
267 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#' Get the latest resource from a data set | ||
#' | ||
#' `get_dataset_additional_info()` returns the most | ||
#' recently uploaded resource to a dataset | ||
#' | ||
#' There are some datasets on the open data platform that | ||
#' keep historic resources instead of updating existing ones. | ||
#' For these it is useful to be able to retrieve the latest | ||
#' resource. As of 5.7.2024 these data sets include: | ||
#' * gp-practice-populations | ||
#' * gp-practice-contact-details-and-list-sizes | ||
#' * nhsscotland-payments-to-general-practice | ||
#' * dental-practices-and-patient-registrations | ||
#' * general-practitioner-contact-details | ||
#' * prescribed-dispensed | ||
#' * prescriptions-in-the-community | ||
#' * community-pharmacy-contractor-activity | ||
#' | ||
#' @param dataset_name name of the dataset as found on | ||
#' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform} | ||
#' @param rows (optional) specify the max number of rows to return. | ||
#' @param row_filters (optional) a named list or vector that specifies values of | ||
#' columns/fields to keep. | ||
#' e.g. list(Date = 20220216, Sex = "Female"). | ||
#' @param col_select (optional) a character vector containing the names of | ||
#' desired columns/fields. | ||
#' e.g. c("Date", "Sex"). | ||
#' @param include_context (optional) If `TRUE` additional information about the | ||
#' resource will be added as columns to the data, including the resource ID, the | ||
#' resource name, the creation date and the last modified/updated date. | ||
#' | ||
#' @return a [tibble][tibble::tibble-package] with the data | ||
#' @export | ||
#' | ||
#' @examples | ||
#' dataset_name <- "gp-practice-contact-details-and-list-sizes" | ||
#' | ||
#' data <- get_latest_resource(dataset_name) | ||
#' | ||
#' filters <- list("Postcode" = "DD11 1ES") | ||
#' wanted_cols <- c("PracticeCode", "Postcode", "Dispensing") | ||
#' | ||
#' filtered_data <- get_latest_resource( | ||
#' dataset_name = dataset_name, | ||
#' row_filters = filters, | ||
#' col_select = wanted_cols | ||
#' ) | ||
#' | ||
get_latest_resource <- function(dataset_name, | ||
rows = NULL, | ||
row_filters = NULL, | ||
col_select = NULL, | ||
include_context = FALSE) { | ||
# get the latest resource id | ||
id <- get_latest_resource_id(dataset_name) | ||
|
||
return_value <- get_resource( | ||
id, | ||
rows, | ||
row_filters, | ||
col_select, | ||
include_context | ||
) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#' get_latest_resource_id | ||
#' | ||
#' to be confident that the resource returned is the one intended | ||
#' two conditions have to be met. It has to appear at the top of | ||
#' of the resource list as shown on the open data platform. | ||
#' The order they are returned via the api is the same | ||
#' as they appear on the open data platform. It also | ||
#' has to have the most recent date created | ||
#' | ||
#' There are only some datasets that this functionality | ||
#' is relevant to, these are listed within applicable | ||
#' datasets and are the datasets that keep historic | ||
#' resources instead of over writing them. | ||
#' | ||
#' @inheritParams get_dataset | ||
#' | ||
#' @return a string with the resource id | ||
get_latest_resource_id <- function(dataset_name) { | ||
applicable_datasets <- c( | ||
"gp-practice-populations", "gp-practice-contact-details-and-list-sizes", | ||
"nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations", | ||
"general-practitioner-contact-details", "prescribed-dispensed", | ||
"prescriptions-in-the-community", "community-pharmacy-contractor-activity" | ||
) | ||
|
||
# throw error if name type/format is invalid | ||
check_dataset_name(dataset_name) | ||
|
||
# define query and try API call | ||
query <- list("id" = dataset_name) | ||
content <- try( | ||
phs_GET("package_show", query), | ||
silent = TRUE | ||
) | ||
|
||
|
||
# check if data set is within applicable datasets | ||
# throw error if not | ||
if (!dataset_name %in% applicable_datasets) { | ||
cli::cli_abort(c( | ||
"The dataset name supplied {.var {dataset_name}} is not within the applicable datasets. | ||
These are:\n | ||
{.var {applicable_datasets}}", | ||
"x" = "Please see get_latest_reource documentation.", | ||
"i" = "You can find dataset names in the URL | ||
of a dataset's page on {.url www.opendata.nhs.scot}." | ||
)) | ||
} | ||
|
||
# if content contains a 'Not Found Error' | ||
# throw error with suggested dataset name | ||
if (grepl("Not Found Error", content[1])) { | ||
suggest_dataset_name(dataset_name) | ||
} | ||
|
||
# send the api request | ||
query <- list("id" = dataset_name) | ||
content <- try( | ||
phs_GET("package_show", query), | ||
silent = TRUE | ||
) | ||
|
||
# retrieve the resource id's from returned contect | ||
all_ids <- purrr::map_chr(content$result$resources, ~ .x$id) | ||
|
||
|
||
# add the id, created date and last_modified to a dataframe | ||
id <- c() | ||
created_date <- c() | ||
modified_date <- c() | ||
|
||
for (i in content$result$resources) { | ||
id <- append(id, i$id) | ||
created_date <- append(created_date, i$created) | ||
modified_date <- append(modified_date, i$last_modified) | ||
} | ||
all_id_data <- tibble::tibble( | ||
id = id, | ||
created_date = strptime(created_date, format = "%FT%X", tz = "UTC"), | ||
modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC") | ||
) %>% | ||
dplyr::mutate(most_recent_date_created = max(created_date)) | ||
|
||
# get the first row of the rources, this will be the same that appears on the top | ||
# on the open data platform | ||
all_id_data_first_row <- all_id_data %>% | ||
dplyr::slice(1) | ||
|
||
# if the resource at the top as appearing on the open data platform also has the most | ||
# recent date created, return it. Otherwise return warning | ||
if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) { | ||
return(all_id_data_first_row$id) | ||
} else { | ||
(warning("most recent id could not be identified")) | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
test_that("returns data for a dataset that is listed", { | ||
expect_no_error(get_latest_resource_id("gp-practice-populations")) | ||
}) | ||
|
||
test_that("returns error for a dataset that is not listed", { | ||
expect_error(get_latest_resource_id("hospital-codes")) | ||
}) |