diff --git a/.Rbuildignore b/.Rbuildignore index b14f90f..f6f9655 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,3 +4,4 @@ ^\.travis\.yml$ ^README\.Rmd$ ^README-.*\.png$ +^data-raw$ diff --git a/DESCRIPTION b/DESCRIPTION index 204d2ac..fd929df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,12 +8,15 @@ Authors@R: c( person("Tina", "Fu", email = "tina.fu@nhs.net", role = "aut"), person("Ciara", "Gribben", email = "ciaragribben@nhs.net", role = "aut"), person("Chris", "Deans", email = "chrisdeans@nhs.net", role = "aut"), + person("Jaime", "Villacampa", email = "jaime.villacampa@nhs.net", role = "aut"), person("Graeme", "Gowans", email = "graeme.gowans@nhs.net", role = "aut") ) Description: Bespoke functions for commonly undertaken analytical tasks in Public Health Scotland. License: GPL (>= 2) URL: https://github.com/Health-SocialCare-Scotland/phsmethods BugReports: https://github.com/Health-SocialCare-Scotland/phsmethods/issues +Depends: + R (>= 2.10) Imports: dplyr, gdata, @@ -22,7 +25,6 @@ Imports: magrittr, purrr, rlang, - stringi, stringr, tibble, utils diff --git a/NAMESPACE b/NAMESPACE index a958c55..11a4d56 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,10 +5,13 @@ export(chi_check) export(chi_pad) export(file_size) export(fin_year) +export(match_area) export(postcode) export(qtr) export(qtr_end) export(qtr_next) export(qtr_prev) +importFrom(magrittr,"%<>%") importFrom(magrittr,"%>%") importFrom(rlang,.data) +importFrom(tibble,tibble) diff --git a/R/data.R b/R/data.R new file mode 100644 index 0000000..dc6c6ae --- /dev/null +++ b/R/data.R @@ -0,0 +1,22 @@ +#' Codes and names of Scottish geographical and administrative areas. +#' +#' A dataset containing Scotland's geography codes and associated area names. +#' It is used within \code{\link{match_area}}. +#' +#' @details \code{geo_code} contains geography codes pertaining to Health +#' Boards, Council Areas, Health and Social Care Partnerships, Intermediate +#' Zones, Data Zones (2001 and 2011), Electoral Wards, Scottish Parliamentary +#' Constituencies, UK Parliamentary Constituencies, Travel to work areas, +#' National Parks, Commmunity Health Partnerships, Localities (S19), +#' Settlements (S20) and Scotland. +#' +#' @seealso The script used to create the \code{area_lookup} dataset on +#' \href{https://github.com/Health-SocialCare-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}. +#' +#' @format A \code{\link[tibble]{tibble}} with 2 variables and over 17,000 rows: +#' \describe{ +#' \item{geo_code}{Standard geography code - 9 characters} +#' \item{area_name}{Name of the area the code represents} +#' } +#' @source \url{https://statistics.gov.scot/} +"area_lookup" diff --git a/R/file_size.R b/R/file_size.R index f4cfe64..70aab34 100644 --- a/R/file_size.R +++ b/R/file_size.R @@ -59,7 +59,7 @@ #' file_size() #' #' # Name and size of .xlsx files only in working directory -#' file_size(pattern = ".xlsx$") +#' file_size(pattern = "\\.xlsx$") #' #' # Size only of alphabetically first file in working directory #' library(magrittr) diff --git a/R/fin_year.R b/R/fin_year.R index 2376577..0e72a58 100644 --- a/R/fin_year.R +++ b/R/fin_year.R @@ -3,15 +3,16 @@ #' @description \code{fin_year} takes a date and assigns it to the correct #' financial year in the PHS specified format. #' -#' @details The PHS accepted format for financial year is yyyy/yy e.g. 2017/18. +#' @details The PHS accepted format for financial year is YYYY/YY e.g. 2017/18. #' #' @param date A date which must be supplied with \code{Date} or \code{POSIXct} -#' class. The functions as.Date(), lubridate::dmy() or as.POSIXct() are examples -#' of functions that can be used to change a variable to the appropriate class. +#' class. \code{\link[base:as.Date]{as.Date()}}, +#' \code{\link[lubridate:ymd]{lubridate::dmy()}} and +#' \code{\link[base:as.POSIXlt]{as.POSIXct()}} are examples of functions which +#' can be used to store dates as an appropriate class. #' #' @examples #' x <- lubridate::dmy(c(21012017, 04042017, 17112017)) -#' #' fin_year(x) #' #' @export @@ -26,7 +27,6 @@ fin_year <- function(date) { # a vector of unique elements from the input, convert those to financial year # and then match them back on to the original input. This vastly improves # performance for large inputs. - tibble::tibble(dates = unique(date)) %>% dplyr::mutate(fin_year = paste0(ifelse(lubridate::month(.data$dates) >= 4, lubridate::year(.data$dates), diff --git a/R/match_area.R b/R/match_area.R new file mode 100644 index 0000000..bcc8806 --- /dev/null +++ b/R/match_area.R @@ -0,0 +1,95 @@ +#' @title Translate geography codes into area names +#' +#' @description \code{match_area} takes a geography code or vector of geography +#' codes. It matches the input to the corresponding value in the +#' \code{\link{area_lookup}} dataset and returns the corresponding area name. +#' +#' @details \code{match_area} relies predominantly on the standard 9 digit +#' geography codes. The only exceptions are: +#' \itemize{ +#' \item RA2701: No Fixed Abode +#' \item RA2702: Rest of UK (Outside Scotland) +#' \item RA2703: Outside the UK +#' \item RA2704: Unknown Residency +#' } +#' +#' \code{match_area} caters for both current and previous versions of geography +#' codes (e.g 2014 and 2019 Health Boards). +#' +#' It can account for geography codes pertaining to Health Boards, Council +#' Areas, Health and Social Care Partnerships, Intermediate Zones, Data Zones +#' (2001 and 2011), Electoral Wards, Scottish Parliamentary Constituencies, +#' UK Parliamentary Constituencies, Travel to work areas, National Parks, +#' Commmunity Health Partnerships, Localities (S19), Settlements (S20) and +#' Scotland. +#' +#' \code{match_area} returns a non-NA value only when an exact match is present +#' between the input value and the corresponding variable in the +#' \code{\link{area_lookup}} dataset. These exact matches are sensitive to both +#' case and spacing. It is advised to inspect \code{\link{area_lookup}} in the +#' case of unexpected results, as these may be explained by subtle differences +#' in transcription between the input value and the corresponding value in the +#' lookup dataset. +#' +#' @param x A geography code or vector of geography codes. + +#' @return Each geography code within Scotland is unique, and consequently +#' \code{match_area} returns a single area name for each input value. + +#' Any input value without a corresponding value in the +#' \code{\link{area_lookup}} dataset will return an NA output value. +#' +#' @examples +#' match_area("S20000010") +#' +#' library(dplyr) +#' df <- tibble(code = c("S02000656", "S02001042", "S08000020", "S12000013")) +#' df %>% mutate(name = match_area(code)) +#' +#' @export + +match_area <- function(x) { + + # Coerce input to character to prevent any warning messages appearing about + # type conversion in dplyr::left_join + code_var <- as.character(x) + + # Calculate the number of non-NA input geography codes which are not 9 + # characters in length or one of the exceptions + n <- length(x[!is.na(x)][nchar(x[!is.na(x)]) != 9 & + !x[!is.na(x)] %in% sprintf("RA270%d", seq(1:4))]) + + # If n is one, the warning message describing the number of non-NA codes + # which are not length 9 or one of the exceptions should use singular verbs + # Otherwise, use plural ones + singular <- "code is" + multiple <- "codes are" + + if (n > 0) { + warning(glue::glue("{n} non-NA input geography ", + "{ifelse(n == 1, singular, multiple)} not 9 characters ", + "in length and will return an NA. The only allowed ", + "codes with a differing number of characters are:\n", + "\U2022 RA2701: No Fixed Abode\n", + "\U2022 RA2702: Rest of UK (Outside Scotland)\n", + "\U2022 RA2703: Outside the UK\n", + "\U2022 RA2704: Unknown Residency")) + } + + # Reading area code to name lookup + area_lookup <- phsmethods::area_lookup + + # Transforming variable into data frame to allow merging with lookup + code_var <- tibble::enframe(code_var, + name = NULL, + value = "geo_code") + + # Merging lookup with code variable and retrieving only the name + dplyr::left_join(code_var, + area_lookup, + by = "geo_code") %>% + + # dplyr::pull takes the last variable if none is specified + dplyr::pull() + +} diff --git a/R/phsmethods.R b/R/phsmethods.R index 50086d6..f1d2a77 100644 --- a/R/phsmethods.R +++ b/R/phsmethods.R @@ -2,14 +2,18 @@ #' #' Standard Methods for use in PHS. #' -#' See the README on \href{https://github.com/Health-SocialCare-Scotland/phsmethods#readme}{GitHub}. +#' See the README on +#' \href{https://github.com/Health-SocialCare-Scotland/phsmethods#readme}{GitHub}. #' #' @docType package #' @name phsmethods #' @importFrom magrittr %>% +#' @importFrom magrittr %<>% #' @importFrom rlang .data +#' @importFrom tibble tibble NULL # Stops notes from appearing in R CMD check because of undefined global -# variable '.' -if (getRversion() >= "2.15.1") utils::globalVariables(c(".")) +# variable '.' and allows area_lookup dataset to be used inside match_area +# function +utils::globalVariables(c(".", "area_lookup")) diff --git a/R/postcode.R b/R/postcode.R index 9f2d3df..134c556 100644 --- a/R/postcode.R +++ b/R/postcode.R @@ -25,11 +25,11 @@ #' consistent with the above format and, if so, assigns the appropriate amount #' of spacing and capitalises any lower case letters. #' -#' @param string A character string or vector of character strings. Input -#' values which adhere to the standard UK postcode format may be upper or lower -#' case and will be formatted regardless of existing spacing. Any input values -#' which do not adhere to the standard UK postcode format will generate an NA -#' and a warning message - see \strong{Value} section for more information. +#' @param x A character string or vector of character strings. Input values +#' which adhere to the standard UK postcode format may be upper or lower case +#' and will be formatted regardless of existing spacing. Any input values which +#' do not adhere to the standard UK postcode format will generate an NA and a +#' warning message - see \strong{Value} section for more information. #' @param format A character string denoting the desired output format. Valid #' options are `pc7` and `pc8`. The default is `pc7`. See \strong{Value} #' section for more information on the string length of output values. @@ -58,25 +58,25 @@ #' postcode(c("KA89NB", "PA152TY"), format = "pc8") #' #' library(dplyr) -#' x <- tibble(pc = c("G429BA", "G207AL", "DD37JY", "DG98BS")) -#' x %>% mutate(pc = postcode(pc)) +#' df <- tibble(pc = c("G429BA", "G207AL", "DD37JY", "DG98BS")) +#' df %>% mutate(pc = postcode(pc)) #' #' @export -postcode <- function(string, format = c("pc7", "pc8")) { +postcode <- function(x, format = c("pc7", "pc8")) { format <- match.arg(format) # Strip out all spaces from the input, so they can be added in again later at # the appropriate juncture - pc <- gsub("\\s", "", string) + x <- gsub("\\s", "", x) # Calculate the number of non-NA values in the input which do not adhere to # the standard UK postcode format n <- length( - pc[!is.na(pc)][!stringr::str_detect( - pc[!is.na(pc)], - "^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?[0-9]{1}[A-Za-z]{2}$")]) + x[!is.na(x)][!stringr::str_detect( + x[!is.na(x)], + "^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?[0-9][A-Za-z]{2}$")]) # If n is one, the warning message describing the number of values which # do not adhere to the standard format should use singular verbs @@ -87,8 +87,8 @@ postcode <- function(string, format = c("pc7", "pc8")) { if ( !all( stringr::str_detect( - pc[!is.na(pc)], - "^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?[0-9]{1}[A-Za-z]{2}$"))) { + x[!is.na(x)], + "^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?[0-9][A-Za-z]{2}$"))) { warning(glue::glue("{n} non-NA input {ifelse(n == 1, singular, multiple)} ", "not adhere to the standard UK postcode format (with ", "or without spaces) and will be coded as NA. The ", @@ -102,18 +102,18 @@ postcode <- function(string, format = c("pc7", "pc8")) { # Replace postcodes which do not adhere to the standard format with NA (this # will also 'replace' NA with NA) - pc <- replace(pc, - !stringr::str_detect( - pc, - "^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?[0-9]{1}[A-Za-z]{2}$"), - NA_character_) + x <- replace(x, + !stringr::str_detect( + x, + "^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?[0-9][A-Za-z]{2}$"), + NA_character_) - if (any(grepl("[a-z]", pc))) { + if (any(grepl("[a-z]", x))) { warning("Lower case letters in any input value(s) adhering to the ", "standard UK postcode format will be converted to upper case") } - pc <- stringr::str_to_upper(pc) + x <- toupper(x) # pc7 format requires all valid postcodes to be of length 7, meaning: # 5 character postcodes have 2 spaces after the 2nd character; @@ -121,10 +121,10 @@ postcode <- function(string, format = c("pc7", "pc8")) { # 7 character postcodes have no spaces if (format == "pc7") { return(dplyr::case_when( - is.na(pc) ~ NA_character_, - stringr::str_length(pc) == 5 ~ sub("(.{2})", "\\1 ", pc), - stringr::str_length(pc) == 6 ~ sub("(.{3})", "\\1 ", pc), - stringr::str_length(pc) == 7 ~ pc + is.na(x) ~ NA_character_, + nchar(x) == 5 ~ sub("(.{2})", "\\1 ", x), + nchar(x) == 6 ~ sub("(.{3})", "\\1 ", x), + nchar(x) == 7 ~ x )) } else { @@ -133,11 +133,9 @@ postcode <- function(string, format = c("pc7", "pc8")) { # All postcodes, whether 5, 6 or 7 characters, have one space before the # last 3 characters return(dplyr::case_when( - is.na(pc) ~ NA_character_, - stringr::str_length(pc) %in% 5:7 ~ - # Reverse the order of the postcodes to add a space after 3 characters, - # then reverse again to get them back the right way around - stringi::stri_reverse(sub("(.{3})", "\\1 ", stringi::stri_reverse(pc))) + is.na(x) ~ NA_character_, + nchar(x) %in% 5:7 ~ paste(stringr::str_sub(x, end = -4), + stringr::str_sub(x, start = -3)) )) } } diff --git a/R/qtr.R b/R/qtr.R index e751a6a..c7fe496 100644 --- a/R/qtr.R +++ b/R/qtr.R @@ -32,13 +32,9 @@ #' #' @examples #' x <- lubridate::dmy(c(26032012, 04052012, 23092012)) -#' #' qtr(x) -#' #' qtr_end(x, format = "short") -#' #' qtr_next(x) -#' #' qtr_prev(x, format = "short") #' @export diff --git a/data-raw/area_lookup.R b/data-raw/area_lookup.R new file mode 100644 index 0000000..9bb5786 --- /dev/null +++ b/data-raw/area_lookup.R @@ -0,0 +1,102 @@ +### This script downloads and formats data pertaining to geographic area names +### and codes from the Scottish Government open data platform. +### +### The resulting file is used inside the match_area function and is made +### available to users of the package via phsmethods::area_lookup. +### +### This script should be re-run prior to every package release, to ensure the +### most up-to-date information provided by the Scottish Government is used. +### +### Any substantial changes to the data should be noted in the section +### pertaining to the latest release in the NEWS.md file. +### +### This code should run successfully on RStudio server. +### It may time out on RStudio desktop due to network security settings. + + +library(SPARQL) +library(magrittr) + +# API address for SG open data platform +endpoint <- "http://statistics.gov.scot/sparql" + +# Query for the platform API, written in SPARQL +query <- "SELECT ?geo_code ?area_name +WHERE { +?s ?entity; + ?geo_code. +OPTIONAL {?s ?area_name.} +} +ORDER BY ?geo_code " + +qd <- SPARQL::SPARQL(endpoint, query) + +area_lookup <- qd[["results"]] %>% + + # Extract the code only + dplyr::mutate(geo_code = substr(geo_code, 2, 10)) %>% + + # Drop codes with no area name + # Storing them isn't necessary as codes without a corresponding area name + # will generate an NA from match_area regardless of whether the code is + # present in the lookup file + tidyr::drop_na(area_name) + +# A bunch of area names don't parse correctly from the SG open data platform +# This seems like a problem with their platform, rather than with SPARQL +# Most of the problems seem to be with parsing non-ASCII characters, although +# not all of the area names which are parsed incorrectly should even have +# non-ASCII characters in them +# This step identifies the problem area names +area_lookup %>% + dplyr::filter(!xfun::is_ascii(area_name)) + +# I did't see an easier solution than googling the codes of the areas with +# problem names, finding out what the real names are, and manually changing them +area_lookup %<>% + dplyr::mutate(area_name = dplyr::case_when( + geo_code == "S13002605" ~ "Ste\U00F2rnabhagh a Deas", + geo_code == "S13002606" ~ "Ste\U00F2rnabhagh a Tuath", + geo_code == "S13002672" ~ "Eilean a' Ch\U00E8o", + geo_code == "S13002891" ~ "Annandale East and Eskdale", + geo_code == "S13002936" ~ "Bo'ness and Blackness", + geo_code == "S13002999" ~ "Eilean a' Ch\U00E8o", + TRUE ~ area_name + )) + +# Manually add some additional codes which aren't present in the lookup file +other_areas <- tibble::tibble( + area_name = c("Scotland", + "Non-NHS Provider/Location", + "Not applicable", + "Golden Jubilee Hospital", + "The State Hospital", + "No Fixed Abode", + "Rest of UK (Outside Scotland)", + "Outside the UK", + "Unknown residency", + "Rest of UK (Outside Scotland)", + "No Fixed Abode", + "Unknown residency", + "Outside the UK"), + geo_code = c("S00000001", + "S27000001", + "S27000002", + "S08100001", + "S08100008", + sprintf("RA270%d", seq(1:4)), + sprintf("S0820000%d", seq(1:4)))) + +# Should the lookup file ever be updated to include any of the additional codes, +# this will prevent those codes from being duplicated in the final file +if (any(other_areas$geo_code %in% area_lookup$geo_code)) { + other_areas %<>% + dplyr::filter(!geo_code %in% area_lookup$geo_code) +} + +area_lookup %<>% + tibble::as_tibble() %>% + dplyr::bind_rows(other_areas) + +# Save data to data/area_lookup.rda +usethis::use_data(area_lookup, overwrite = TRUE) diff --git a/data/area_lookup.rda b/data/area_lookup.rda new file mode 100644 index 0000000..659ee9d Binary files /dev/null and b/data/area_lookup.rda differ diff --git a/man/area_lookup.Rd b/man/area_lookup.Rd new file mode 100644 index 0000000..701d727 --- /dev/null +++ b/man/area_lookup.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/data.R +\docType{data} +\name{area_lookup} +\alias{area_lookup} +\title{Codes and names of Scottish geographical and administrative areas.} +\format{A \code{\link[tibble]{tibble}} with 2 variables and over 17,000 rows: +\describe{ + \item{geo_code}{Standard geography code - 9 characters} + \item{area_name}{Name of the area the code represents} +}} +\source{ +\url{https://statistics.gov.scot/} +} +\usage{ +area_lookup +} +\description{ +A dataset containing Scotland's geography codes and associated area names. +It is used within \code{\link{match_area}}. +} +\details{ +\code{geo_code} contains geography codes pertaining to Health +Boards, Council Areas, Health and Social Care Partnerships, Intermediate +Zones, Data Zones (2001 and 2011), Electoral Wards, Scottish Parliamentary +Constituencies, UK Parliamentary Constituencies, Travel to work areas, +National Parks, Commmunity Health Partnerships, Localities (S19), +Settlements (S20) and Scotland. +} +\seealso{ +The script used to create the \code{area_lookup} dataset on +\href{https://github.com/Health-SocialCare-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}. +} +\keyword{datasets} diff --git a/man/file_size.Rd b/man/file_size.Rd index afaa959..0d555a6 100644 --- a/man/file_size.Rd +++ b/man/file_size.Rd @@ -70,7 +70,7 @@ be 1,024 units of the preceding denomination. file_size() # Name and size of .xlsx files only in working directory -file_size(pattern = ".xlsx$") +file_size(pattern = "\\\\.xlsx$") # Size only of alphabetically first file in working directory library(magrittr) diff --git a/man/fin_year.Rd b/man/fin_year.Rd index 15e5f9f..60a170b 100644 --- a/man/fin_year.Rd +++ b/man/fin_year.Rd @@ -8,19 +8,20 @@ fin_year(date) } \arguments{ \item{date}{A date which must be supplied with \code{Date} or \code{POSIXct} -class. The functions as.Date(), lubridate::dmy() or as.POSIXct() are examples -of functions that can be used to change a variable to the appropriate class.} +class. \code{\link[base:as.Date]{as.Date()}}, +\code{\link[lubridate:ymd]{lubridate::dmy()}} and +\code{\link[base:as.POSIXlt]{as.POSIXct()}} are examples of functions which +can be used to store dates as an appropriate class.} } \description{ \code{fin_year} takes a date and assigns it to the correct financial year in the PHS specified format. } \details{ -The PHS accepted format for financial year is yyyy/yy e.g. 2017/18. +The PHS accepted format for financial year is YYYY/YY e.g. 2017/18. } \examples{ x <- lubridate::dmy(c(21012017, 04042017, 17112017)) - fin_year(x) } diff --git a/man/match_area.Rd b/man/match_area.Rd new file mode 100644 index 0000000..219721d --- /dev/null +++ b/man/match_area.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/match_area.R +\name{match_area} +\alias{match_area} +\title{Translate geography codes into area names} +\usage{ +match_area(x) +} +\arguments{ +\item{x}{A geography code or vector of geography codes.} +} +\value{ +Each geography code within Scotland is unique, and consequently +\code{match_area} returns a single area name for each input value. +Any input value without a corresponding value in the +\code{\link{area_lookup}} dataset will return an NA output value. +} +\description{ +\code{match_area} takes a geography code or vector of geography +codes. It matches the input to the corresponding value in the +\code{\link{area_lookup}} dataset and returns the corresponding area name. +} +\details{ +\code{match_area} relies predominantly on the standard 9 digit +geography codes. The only exceptions are: +\itemize{ +\item RA2701: No Fixed Abode +\item RA2702: Rest of UK (Outside Scotland) +\item RA2703: Outside the UK +\item RA2704: Unknown Residency +} + +\code{match_area} caters for both current and previous versions of geography +codes (e.g 2014 and 2019 Health Boards). + +It can account for geography codes pertaining to Health Boards, Council +Areas, Health and Social Care Partnerships, Intermediate Zones, Data Zones +(2001 and 2011), Electoral Wards, Scottish Parliamentary Constituencies, +UK Parliamentary Constituencies, Travel to work areas, National Parks, +Commmunity Health Partnerships, Localities (S19), Settlements (S20) and +Scotland. + +\code{match_area} returns a non-NA value only when an exact match is present +between the input value and the corresponding variable in the +\code{\link{area_lookup}} dataset. These exact matches are sensitive to both +case and spacing. It is advised to inspect \code{\link{area_lookup}} in the +case of unexpected results, as these may be explained by subtle differences +in transcription between the input value and the corresponding value in the +lookup dataset. +} +\examples{ +match_area("S20000010") + +library(dplyr) +df <- tibble(code = c("S02000656", "S02001042", "S08000020", "S12000013")) +df \%>\% mutate(name = match_area(code)) + +} diff --git a/man/phsmethods.Rd b/man/phsmethods.Rd index d154c38..ea38a6c 100644 --- a/man/phsmethods.Rd +++ b/man/phsmethods.Rd @@ -8,5 +8,6 @@ Standard Methods for use in PHS. } \details{ -See the README on \href{https://github.com/Health-SocialCare-Scotland/phsmethods#readme}{GitHub}. +See the README on +\href{https://github.com/Health-SocialCare-Scotland/phsmethods#readme}{GitHub}. } diff --git a/man/postcode.Rd b/man/postcode.Rd index f44610c..04a08a9 100644 --- a/man/postcode.Rd +++ b/man/postcode.Rd @@ -4,14 +4,14 @@ \alias{postcode} \title{Format a postcode} \usage{ -postcode(string, format = c("pc7", "pc8")) +postcode(x, format = c("pc7", "pc8")) } \arguments{ -\item{string}{A character string or vector of character strings. Input -values which adhere to the standard UK postcode format may be upper or lower -case and will be formatted regardless of existing spacing. Any input values -which do not adhere to the standard UK postcode format will generate an NA -and a warning message - see \strong{Value} section for more information.} +\item{x}{A character string or vector of character strings. Input values +which adhere to the standard UK postcode format may be upper or lower case +and will be formatted regardless of existing spacing. Any input values which +do not adhere to the standard UK postcode format will generate an NA and a +warning message - see \strong{Value} section for more information.} \item{format}{A character string denoting the desired output format. Valid options are `pc7` and `pc8`. The default is `pc7`. See \strong{Value} @@ -69,7 +69,7 @@ postcode("G26QE") postcode(c("KA89NB", "PA152TY"), format = "pc8") library(dplyr) -x <- tibble(pc = c("G429BA", "G207AL", "DD37JY", "DG98BS")) -x \%>\% mutate(pc = postcode(pc)) +df <- tibble(pc = c("G429BA", "G207AL", "DD37JY", "DG98BS")) +df \%>\% mutate(pc = postcode(pc)) } diff --git a/man/qtr.Rd b/man/qtr.Rd index 50d18b7..6c4a81a 100644 --- a/man/qtr.Rd +++ b/man/qtr.Rd @@ -49,12 +49,8 @@ Quarters are defined as: } \examples{ x <- lubridate::dmy(c(26032012, 04052012, 23092012)) - qtr(x) - qtr_end(x, format = "short") - qtr_next(x) - qtr_prev(x, format = "short") } diff --git a/tests/testthat/test-file_size.R b/tests/testthat/test-file_size.R index 65607fe..e051d6f 100644 --- a/tests/testthat/test-file_size.R +++ b/tests/testthat/test-file_size.R @@ -26,7 +26,7 @@ test_that("Returns sizes in alphabetical order", { }) test_that("Errors if supplied with invalid filepath", { - expect_error(file_size(here::here("data"))) + expect_error(file_size(here::here("datatest"))) expect_error(file_size(NA)) expect_error(file_size(NULL)) }) diff --git a/tests/testthat/test-match_area.R b/tests/testthat/test-match_area.R new file mode 100644 index 0000000..51b99c2 --- /dev/null +++ b/tests/testthat/test-match_area.R @@ -0,0 +1,53 @@ +context("test-match_area") + +test_that("Returns the correct area names", { + expect_equal(match_area(c("S20000010", + "S01002363", + "S01004303", + "S02000656", + "S02001042", + "S08000020", + "S12000013", + "S12000048", + "S13002522", + "S13002873", + "S14000020", + "S16000124", + "S22000004")), + c("Eaglesham", + "Marybank to Newvalley", + "Elgin South Lesmurdie", + "Govan and Linthouse", + "Peebles North", + "Grampian", + "Na h-Eileanan Siar", + "Perth and Kinross", + "Dunoon", + "Arbroath East and Lunan", + "East Lothian", + "Hamilton, Larkhall and Stonehouse", + "Banff")) +}) + +test_that("Handles NA input values correctly", { + expect_true(is.na(match_area(NA))) + expect_equal(match_area(c("S13002781", NA, NA, "S13003089")), + c("Ayr North", NA, NA, "Ayr North")) +}) + +test_that("Produces warnings for geography codes of invalid length", { + expect_warning(match_area("tiny changes")) + + # The last entry is only 8 characters + expect_warning(match_area(c(NA, "S01012487", "S0101248"))) +}) + +test_that("Produces no warning for codes of valid length with no match", { + expect_silent(match_area("S01000001")) + expect_silent(match_area(c(NA, "RA2703", "123456789"))) +}) + +test_that("Warns about the appropriate number of entries", { + expect_warning(match_area(123223), "^1") + expect_warning(match_area(c(NA, sprintf("RA270%d", seq(1:7)))), "^3") +})