Skip to content

Commit

Permalink
Merge branch 'master' into issue10
Browse files Browse the repository at this point in the history
  • Loading branch information
Moohan authored Aug 1, 2024
2 parents 312dea4 + 22f73ab commit 68fb34f
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 90 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@v4.5.0
uses: JamesIves/github-pages-deploy-action@v4.6.3
with:
clean: false
branch: gh-pages
Expand Down
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ BugReports: https://github.com/Public-Health-Scotland/phsopendata/issues
Imports:
cli,
dplyr (>= 1.0.0),
glue (>= 1.0.0),
httr (>= 1.0.0),
jsonlite (>= 1.0.0),
magrittr (>= 1.0.0),
purrr,
readr (>= 1.0.0),
rlang (>= 1.0.0),
stringdist,
tibble (>= 3.0.0),
xml2
Expand Down
64 changes: 41 additions & 23 deletions R/get_latest_resource.R
Original file line number Diff line number Diff line change
@@ -1,33 +1,22 @@
#' Get the latest resource from a data set
#'
#' `get_dataset_additional_info()` returns the most
#' recently uploaded resource to a dataset
#' Returns the latest resource available in a dataset.
#'
#' There are some datasets on the open data platform that
#' keep historic resources instead of updating existing ones.
#' For these it is useful to be able to retrieve the latest
#' resource. As of 5.7.2024 these data sets include:
#' resource. As of 1.8.2024 these data sets include:
#' * gp-practice-populations
#' * gp-practice-contact-details-and-list-sizes
#' * nhsscotland-payments-to-general-practice
#' * dental-practices-and-patient-registrations
#' * general-practitioner-contact-details
#' * prescribed-dispensed
#' * prescriptions-in-the-community
#' * dispenser-location-contact-details
#' * community-pharmacy-contractor-activity
#'
#' @param dataset_name name of the dataset as found on
#' \href{https://www.opendata.nhs.scot/}{NHS Open Data platform}
#' @param rows (optional) specify the max number of rows to return.
#' @param row_filters (optional) a named list or vector that specifies values of
#' columns/fields to keep.
#' e.g. list(Date = 20220216, Sex = "Female").
#' @param col_select (optional) a character vector containing the names of
#' desired columns/fields.
#' e.g. c("Date", "Sex").
#' @param include_context (optional) If `TRUE` additional information about the
#' resource will be added as columns to the data, including the resource ID, the
#' resource name, the creation date and the last modified/updated date.
#' @inheritParams get_dataset
#' @inheritParams get_resource
#'
#' @return a [tibble][tibble::tibble-package] with the data
#' @export
Expand All @@ -50,15 +39,44 @@ get_latest_resource <- function(dataset_name,
rows = NULL,
row_filters = NULL,
col_select = NULL,
include_context = FALSE) {
include_context = TRUE) {
applicable_datasets <- c(
"community-pharmacy-contractor-activity",
"dental-practices-and-patient-registrations",
"dispenser-location-contact-details",
"general-practitioner-contact-details",
"gp-practice-contact-details-and-list-sizes",
"gp-practice-populations",
"nhsscotland-payments-to-general-practice",
"prescribed-dispensed"
)

# check if data set is within applicable datasets
# throw error if not
if (!dataset_name %in% applicable_datasets) {
cli::cli_abort(
c(
"The dataset name supplied {.val {dataset_name}} is not within the applicable datasets.
These are: {.val {applicable_datasets}}",
"x" = "Please see {.fun get_latest_resource} documentation.",
"i" = "You can find dataset names in the URL
of a dataset's page on {.url www.opendata.nhs.scot}."
),
call = rlang::caller_env()
)
}


# get the latest resource id
id <- get_latest_resource_id(dataset_name)

return_value <- get_resource(
id,
rows,
row_filters,
col_select,
include_context
data <- get_resource(
res_id = id,
rows = rows,
row_filters = row_filters,
col_select = col_select,
include_context = include_context
)

return(data)
}
59 changes: 9 additions & 50 deletions R/get_latest_resource_id.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,49 +16,9 @@
#'
#' @return a string with the resource id
get_latest_resource_id <- function(dataset_name) {
applicable_datasets <- c(
"gp-practice-populations", "gp-practice-contact-details-and-list-sizes",
"nhsscotland-payments-to-general-practice", "dental-practices-and-patient-registrations",
"general-practitioner-contact-details", "prescribed-dispensed",
"prescriptions-in-the-community", "community-pharmacy-contractor-activity"
)

# throw error if name type/format is invalid
check_dataset_name(dataset_name)

# define query and try API call
query <- list("id" = dataset_name)
content <- try(
phs_GET("package_show", query),
silent = TRUE
)


# check if data set is within applicable datasets
# throw error if not
if (!dataset_name %in% applicable_datasets) {
cli::cli_abort(c(
"The dataset name supplied {.var {dataset_name}} is not within the applicable datasets.
These are:\n
{.var {applicable_datasets}}",
"x" = "Please see get_latest_reource documentation.",
"i" = "You can find dataset names in the URL
of a dataset's page on {.url www.opendata.nhs.scot}."
))
}

# if content contains a 'Not Found Error'
# throw error with suggested dataset name
if (grepl("Not Found Error", content[1])) {
suggest_dataset_name(dataset_name)
}

# send the api request
query <- list("id" = dataset_name)
content <- try(
phs_GET("package_show", query),
silent = TRUE
)
content <- phs_GET("package_show", query)

# retrieve the resource id's from returned contect
all_ids <- purrr::map_chr(content$result$resources, ~ .x$id)

Check warning on line 24 in R/get_latest_resource_id.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_latest_resource_id.R,line=24,col=3,[object_usage_linter] local variable 'all_ids' assigned but may not be used
Expand All @@ -69,10 +29,10 @@ get_latest_resource_id <- function(dataset_name) {
created_date <- c()
modified_date <- c()

for (i in content$result$resources) {
id <- append(id, i$id)
created_date <- append(created_date, i$created)
modified_date <- append(modified_date, i$last_modified)
for (res in content$result$resources) {
id <- append(id, res$id)
created_date <- append(created_date, res$created)
modified_date <- append(modified_date, res$last_modified)
}
all_id_data <- tibble::tibble(
id = id,
Expand All @@ -81,16 +41,15 @@ get_latest_resource_id <- function(dataset_name) {
) %>%
dplyr::mutate(most_recent_date_created = max(created_date))

# get the first row of the rources, this will be the same that appears on the top
# get the first row of the resources, this will be the same that appears on the top
# on the open data platform
all_id_data_first_row <- all_id_data %>%
dplyr::slice(1)

# if the resource at the top as appearing on the open data platform also has the most
# recent date created, return it. Otherwise return warning
# If the resource at the top as appearing on the open data platform also has the most
# recent date created, return it. Otherwise, error
if (all_id_data_first_row$created_date == all_id_data_first_row$most_recent_date_created) {
return(all_id_data_first_row$id)
} else {
(warning("most recent id could not be identified"))
}
cli::cli_abort("The most recent id could not be identified")
}
12 changes: 6 additions & 6 deletions man/get_latest_resource.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 4 additions & 7 deletions tests/testthat/test-get_latest_resource.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
test_that("returns more than 0 datasets", {
# select the first row of the tibble and get the
# number of rows. If no datasets were returned
# this will be 0
expect_equal(nrow(dplyr::slice(get_latest_resource("gp-practice-populations"), 1)), 1)
test_that("returns data for a dataset that is listed", {
expect_no_error(get_latest_resource("gp-practice-populations"))
})

test_that("returns data in the expected format", {
expect_s3_class(get_latest_resource("gp-practice-populations"), "tbl_df")
test_that("returns error for a dataset that is not listed", {
expect_error(get_latest_resource("hospital-codes"))
})
4 changes: 2 additions & 2 deletions tests/testthat/test-get_latest_resource_id.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
test_that("returns data for a dataset that is listed", {
expect_no_error(get_latest_resource_id("gp-practice-populations"))
expect_no_error(get_latest_resource("gp-practice-populations"))
})

test_that("returns error for a dataset that is not listed", {
expect_error(get_latest_resource_id("hospital-codes"), "The dataset name supplied `hospital-codes` is not within the applicable datasets")
expect_error(get_latest_resource("hospital-codes"))
})

0 comments on commit 68fb34f

Please sign in to comment.