Skip to content

Commit

Permalink
Split get_latest_resource to its own branch
Browse files Browse the repository at this point in the history
  • Loading branch information
Moohan committed Jul 8, 2024
1 parent 223527d commit 05bd504
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export("%>%")
export(get_dataset)
export(get_latest_resource)
export(get_resource)
export(get_resource_sql)
importFrom(magrittr,"%>%")
64 changes: 64 additions & 0 deletions R/get_latest_resource.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#' Get the latest resource from a data set
#'
#' `get_dataset_additional_info()` returns the most
#' recently uploaded resource to a dataset
#'
#' There are some datasets on the open data platform that
#' keep historic resources instead of updating existing ones.
#' For these it is useful to be able to retrieve the latest
#' resource. As of 5.7.2024 these data sets include:
#' * gp-practice-populations
#' * gp-practice-contact-details-and-list-sizes
#' * nhsscotland-payments-to-general-practice
#' * dental-practices-and-patient-registrations
#' * general-practitioner-contact-details
#' * prescribed-dispensed
#' * prescriptions-in-the-community
#' * community-pharmacy-contractor-activity
#'
#' @param dataset_name name of the dataset as found on
#' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform}
#' @param rows (optional) specify the max number of rows to return.
#' @param row_filters (optional) a named list or vector that specifies values of
#' columns/fields to keep.
#' e.g. list(Date = 20220216, Sex = "Female").
#' @param col_select (optional) a character vector containing the names of
#' desired columns/fields.
#' e.g. c("Date", "Sex").
#' @param include_context (optional) If `TRUE` additional information about the
#' resource will be added as columns to the data, including the resource ID, the
#' resource name, the creation date and the last modified/updated date.
#'
#' @return a [tibble][tibble::tibble-package] with the data
#' @export
#'
#' @examples
#' dataset_name <- "gp-practice-contact-details-and-list-sizes"
#'
#' data <- get_latest_resource(dataset_name)
#'
#' filters <- list("Postcode" = "DD11 1ES")
#' wanted_cols <- c("PracticeCode", "Postcode", "Dispensing")
#'
#' filtered_data <- get_latest_resource(
#' dataset_name = dataset_name,
#' row_filters = filters,
#' col_select = wanted_cols
#' )
#'
get_latest_resource <- function(dataset_name,
rows = NULL,
row_filters = NULL,
col_select = NULL,
include_context = FALSE) {
# get the latest resource id
id <- get_latest_resource_id(dataset_name)

return_value <- get_resource(
id,
rows,
row_filters,
col_select,
include_context
)
}
96 changes: 96 additions & 0 deletions R/get_latest_resource_id.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#' get_latest_resource_id
#'
#' to be confident that the resource returned is the one intended
#' two conditions have to be met. It has to appear at the top of
#' of the resource list as shown on the open data platform.
#' The order they are returned via the api is the same
#' as they appear on the open data platform. It also
#' has to have the most recent date created
#'
#' There are only some datasets that this functionality
#' is relevant to, these are listed within applicable
#' datasets and are the datasets that keep historic
#' resources instead of over writing them.
#'
#' @inheritParams get_dataset
#'
#' @return a string with the resource id
get_latest_resource_id <- function(dataset_name) {
applicable_datasets <- c(
"gp-practice-populations", "gp-practice-contact-details-and-list-sizes",
"nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations",

Check warning on line 21 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=21,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 93 characters.
"general-practitioner-contact-details", "prescribed-dispensed",
"prescriptions-in-the-community", "community-pharmacy-contractor-activity"
)

# throw error if name type/format is invalid
check_dataset_name(dataset_name)

# define query and try API call
query <- list("id" = dataset_name)
content <- try(
phs_GET("package_show", query),
silent = TRUE
)


# check if data set is within applicable datasets
# throw error if not
if (!dataset_name %in% applicable_datasets) {
cli::cli_abort(c(
"The dataset name supplied {.var {dataset_name}} is not within the applicable datasets.

Check warning on line 41 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=41,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 93 characters.
These are:\n
{.var {applicable_datasets}}",
"x" = "Please see get_latest_reource documentation.",
"i" = "You can find dataset names in the URL
of a dataset's page on {.url www.opendata.nhs.scot}."
))
}

# if content contains a 'Not Found Error'
# throw error with suggested dataset name
if (grepl("Not Found Error", content[1])) {
suggest_dataset_name(dataset_name)
}

# send the api request
query <- list("id" = dataset_name)
content <- try(
phs_GET("package_show", query),
silent = TRUE
)

# retrieve the resource id's from returned contect
all_ids <- purrr::map_chr(content$result$resources, ~ .x$id)

Check warning on line 64 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=64,col=3,[object_usage_linter] local variable 'all_ids' assigned but may not be used


# add the id, created date and last_modified to a dataframe
id <- c()
created_date <- c()
modified_date <- c()

for (i in content$result$resources) {
id <- append(id, i$id)
created_date <- append(created_date, i$created)
modified_date <- append(modified_date, i$last_modified)
}
all_id_data <- tibble::tibble(
id = id,
created_date = strptime(created_date, format = "%FT%X", tz = "UTC"),
modified_date = strptime(modified_date, format = "%FT%X", tz = "UTC")
) %>%
dplyr::mutate(most_recent_date_created = max(created_date))

# get the first row of the rources, this will be the same that appears on the top

Check warning on line 84 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=84,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 83 characters.
# on the open data platform
all_id_data_first_row <- all_id_data %>%
dplyr::slice(1)

# if the resource at the top as appearing on the open data platform also has the most

Check warning on line 89 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=89,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 87 characters.
# recent date created, return it. Otherwise return warning
if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) {
return(all_id_data_first_row$id)
} else {
(warning("most recent id could not be identified"))
}
}
70 changes: 70 additions & 0 deletions man/get_latest_resource.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions man/get_latest_resource_id.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions tests/testthat/test-get_latest_resource_id.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
test_that("returns data for a dataset that is listed", {
expect_no_error(get_latest_resource_id("gp-practice-populations"))
})

test_that("returns error for a dataset that is not listed", {
expect_error(get_latest_resource_id("hospital-codes"))
})

0 comments on commit 05bd504

Please sign in to comment.