diff --git a/DESCRIPTION b/DESCRIPTION index 64939d7..df018e7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,11 +1,12 @@ Package: phsmethods Title: Standard Methods for use in Public Health Scotland -Version: 0.2.2 +Version: 0.2.3 Authors@R: c( - person("David", "Caldwell", , "David.Caldwell@phs.scot", role = c("aut", "cre")), + person("Public Health Scotland", , , "phs.source@phs.scot", role = "cph"), + person("David", "Caldwell", , "David.Caldwell@phs.scot", role = "aut"), person("Lucinda", "Lawrie", , "Lucinda.Lawrie@phs.scot", role = "rev"), person("Jack", "Hannah", , "jack.hannah2@phs.scot", role = "aut"), - person("Tina", "Fu", , "Yuyan.Fu2@phs.scot", role = "aut"), + person("Tina", "Fu", , "Yuyan.Fu2@phs.scot", role = c("aut", "cre")), person("Ciara", "Gribben", , "Ciara.Gribben@phs.scot", role = "aut"), person("Chris", "Deans", , "Chris.Deans2@phs.scot", role = "aut"), person("Jaime", "Villacampa", , "Jaime.Villacampa@phs.scot", role = "aut"), @@ -19,28 +20,27 @@ Authors@R: c( Description: Bespoke functions for commonly undertaken analytical tasks in Public Health Scotland. License: GPL (>= 2) -URL: https://github.com/Health-SocialCare-Scotland/phsmethods, +URL: https://github.com/Public-Health-Scotland/phsmethods, https://public-health-scotland.github.io/phsmethods/ BugReports: - https://github.com/Health-SocialCare-Scotland/phsmethods/issues + https://github.com/Public-Health-Scotland/phsmethods/issues Depends: R (>= 2.10) Imports: cli, dplyr, - gdata, lifecycle, lubridate, magrittr, purrr, readr, rlang, + scales (>= 1.0.0), stringr, tibble, utils Suggests: covr, - here, spelling, testthat (>= 3.0.0) RdMacros: @@ -49,4 +49,4 @@ Config/testthat/edition: 3 Encoding: UTF-8 Language: en-GB LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 diff --git a/NEWS.md b/NEWS.md index 446d4ba..fb67176 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,16 +1,26 @@ -# phsmethods 0.2.2 +# phsmethods 0.2.3 (2023-09-11) -- Improve `chi_check()` function to make it more efficient and run faster. +- The [{gdata}](https://github.com/r-gregmisc/gdata) import has been dropped and replaced with [{scales}](https://scales.r-lib.org/). -- Improve "Using phsmethods" section in readme to be shorter and more accessible. +- `extract_fin_year()` is now much faster and will use less memory, especially for smaller vectors (1 to 1,000). -- Update all errors, warnings and messages to use the `cli` package. +- `format_postcode()` is now faster and also gains a `quiet` parameter, the default value is `FALSE` but setting it to `TRUE` will skip some of the checks and messages, this is useful when using `format_postcode()` to 'clean-up' and format a vector of postcodes, rather than wanting to check them. Because of the skipped checks `quiet = TRUE` should also run faster. + +- The installation instructions in the README have been updated. + +# phsmethods 0.2.2 (2022-11-14) + +- Improved `chi_check()` to make it more efficient and run faster. + +- Improved the "Using phsmethods" section in the README to be shorter and more accessible. + +- Update all errors, warnings and messages to use [{cli}](https://cli.r-lib.org/). - Improve errors when giving incorrect types to some functions. # phsmethods 0.2.1 (2022-02-11) -- Three functions renamed to improve code clarity: `postcode()` to `format_postcode()`; `age_group()` to `create_age_groups()`; `fin_year()` to `extract_fin_year()`. The old functions will still work but will produce a warning. After a reasonable amount of time they will be removed completely. +- Three functions renamed to improve code clarity: `postcode()` to `format_postcode()`; `age_group()` to `create_age_groups()`; `fin_year()` to `extract_fin_year()`. The old functions will still work but will produce a warning. After a reasonable amount of time, they will be removed completely. - New functions added: `age_calculate()`([#65](https://github.com/Public-Health-Scotland/phsmethods/issues/65), [@Nic-chr](https://github.com/Nic-Chr)); @@ -21,7 +31,7 @@ # phsmethods 0.2.0 (2020-04-17) -- New functions added: `age_group()`([#23](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/23), [@chrisdeans](https://github.com/chrisdeans)); `chi_check()`([#30](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); `chi_pad()`([#30](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); and `match_area()`([#13](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/13), [@jvillacampa](https://github.com/jvillacampa)). +- New functions added: `age_group()`([#23](https://github.com/Public-Health-Scotland/phsmethods/issues/23), [@chrisdeans](https://github.com/chrisdeans)); `chi_check()`([#30](https://github.com/Public-Health-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); `chi_pad()`([#30](https://github.com/Public-Health-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); and `match_area()`([#13](https://github.com/Public-Health-Scotland/phsmethods/issues/13), [@jvillacampa](https://github.com/jvillacampa)). - The first argument of `postcode()` is now `x`, as opposed to `string`. This is unlikely to break much, if any, existing code. `postcode()` is also now slightly faster. diff --git a/R/age_calculate.R b/R/age_calculate.R index d22eae6..06a82be 100644 --- a/R/age_calculate.R +++ b/R/age_calculate.R @@ -30,7 +30,6 @@ #' @export age_calculate <- function(start, end = if (lubridate::is.Date(start)) Sys.Date() else Sys.time(), units = c("years", "months"), round_down = TRUE) { - make_inheritance_checks(list(start = start, end = end), target_classes = c("Date", "POSIXt"), ignore_null = FALSE) units <- match.arg(tolower(units), c("years", "months")) diff --git a/R/data.R b/R/data.R index b66a85e..52a8cdf 100644 --- a/R/data.R +++ b/R/data.R @@ -11,7 +11,7 @@ #' Settlements (S20) and Scotland. #' #' @seealso The script used to create the \code{area_lookup} dataset on -#' \href{https://github.com/Health-SocialCare-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}. +#' \href{https://github.com/Public-Health-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}. #' #' @format A \code{\link[tibble]{tibble}} with 2 variables and over 17,000 rows: #' \describe{ diff --git a/R/dob_from_chi.R b/R/dob_from_chi.R index 811c6f5..a5e928a 100644 --- a/R/dob_from_chi.R +++ b/R/dob_from_chi.R @@ -38,7 +38,6 @@ #' max_date = adm_date #' )) dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check = TRUE) { - # Do type checking on the params if (!inherits(chi_number, "character")) { cli::cli_abort("{.arg chi_number} must be a {.cls character} vector, not a {.cls {class(chi_number)}} vector.") @@ -169,7 +168,6 @@ dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check #' ref_date = dis_date #' )) age_from_chi <- function(chi_number, ref_date = NULL, min_age = 0, max_age = NULL, chi_check = TRUE) { - # Do type checking on the params if (!inherits(chi_number, "character")) { cli::cli_abort("{.arg chi_number} must be a {.cls character} vector, not a {.cls {class(chi_number)}} vector.") diff --git a/R/extract_fin_year.R b/R/extract_fin_year.R index 3f27730..2a74537 100644 --- a/R/extract_fin_year.R +++ b/R/extract_fin_year.R @@ -1,7 +1,7 @@ -#' @title Assign a date to a financial year +#' @title Extract the formatted financial year from a date #' -#' @description \code{extract_fin_year} takes a date and assigns it to the correct -#' financial year in the PHS specified format. +#' @description \code{extract_fin_year} takes a date and extracts the +#' correct financial year in the PHS specified format from it. #' #' @details The PHS accepted format for financial year is YYYY/YY e.g. 2017/18. #' @@ -17,7 +17,8 @@ #' @export extract_fin_year <- function(date) { if (!inherits(date, c("Date", "POSIXct"))) { - cli::cli_abort("{.arg date} must be a {.cls Date} or {.cls POSIXct} vector, not a {.cls {class(date)}} vector.") + cli::cli_abort("{.arg date} must be a {.cls Date} or {.cls POSIXct} vector, + not a {.cls {class(date)}} vector.") } # Simply converting all elements of the input vector resulted in poor @@ -26,29 +27,18 @@ extract_fin_year <- function(date) { # and then match them back on to the original input. This vastly improves # performance for large inputs. - x <- tibble::tibble(dates = unique(date)) %>% - dplyr::mutate( - fyear = paste0( - ifelse(lubridate::month(.data$dates) >= 4, - lubridate::year(.data$dates), - lubridate::year(.data$dates) - 1 - ), - "/", - substr( - ifelse(lubridate::month(.data$dates) >= 4, - lubridate::year(.data$dates) + 1, - lubridate::year(.data$dates) - ), - 3, 4 - ) - ), - fyear = ifelse(is.na(.data$dates), - NA_character_, - .data$fyear - ) - ) + unique_date <- unique(date) - tibble::tibble(dates = date) %>% - dplyr::left_join(x, by = "dates") %>% - dplyr::pull(.data$fyear) + unique_fy_q <- + lubridate::year(unique_date) - (lubridate::month(unique_date) %in% 1:3) + + unique_fy <- ifelse( + is.na(unique_date), + NA_character_, + paste0(unique_fy_q, "/", (unique_fy_q %% 100L) + 1L) + ) + + fin_years <- unique_fy[match(date, unique_date)] + + return(fin_years) } diff --git a/R/file_size.R b/R/file_size.R index f3bd694..4f38bbe 100644 --- a/R/file_size.R +++ b/R/file_size.R @@ -84,41 +84,34 @@ file_size <- function(filepath = getwd(), pattern = NULL) { cli::cli_abort("{.arg pattern} must be a {.cls character}, not a {.cls {class(pattern)}}.") } - x <- dir(path = filepath, pattern = pattern) + file_list <- list.files(path = filepath, pattern = pattern) - if (length(x) == 0) { + if (length(file_list) == 0) { return(NULL) } - y <- x %>% - purrr::map_dbl(~ file.info(paste0(filepath, "/", .))$size) %>% - # The gdata package defines a kilobyte (KB) as 1,000 bytes, and a - # kibibyte (KiB) as 1,024 bytes - # In PHS a kilobyte is normally taken to be 1,024 bytes - # As a workaround, calculate file sizes in kibibytes (or higher), then - # drop the `i` from the output - gdata::humanReadable(standard = "IEC", digits = 0) %>% - gsub("i", "", .) %>% - trimws() + formatted_size <- file.path(filepath, file_list) %>% + file.size() %>% + scales::number_bytes(units = "si") - z <- dplyr::case_when( - stringr::str_detect(x, "\\.xls(b|m|x)?$") ~ "Excel ", - stringr::str_detect(x, "\\.csv$") ~ "CSV ", - stringr::str_detect(x, "\\.z?sav$") ~ "SPSS ", - stringr::str_detect(x, "\\.doc(m|x)?$") ~ "Word ", - stringr::str_detect(x, "\\.rds$") ~ "RDS ", - stringr::str_detect(x, "\\.txt$") ~ "Text ", - stringr::str_detect(x, "\\.fst$") ~ "FST ", - stringr::str_detect(x, "\\.pdf$") ~ "PDF ", - stringr::str_detect(x, "\\.tsv$") ~ "TSV ", - stringr::str_detect(x, "\\.html$") ~ "HTML ", - stringr::str_detect(x, "\\.ppt(m|x)?$") ~ "PowerPoint ", - stringr::str_detect(x, "\\.md$") ~ "Markdown ", - TRUE ~ "" + file_type <- dplyr::case_when( + stringr::str_detect(file_list, "\\.xls(b|m|x)?$") ~ "Excel ", + stringr::str_detect(file_list, "\\.csv$") ~ "CSV ", + stringr::str_detect(file_list, "\\.z?sav$") ~ "SPSS ", + stringr::str_detect(file_list, "\\.doc(m|x)?$") ~ "Word ", + stringr::str_detect(file_list, "\\.rds$") ~ "RDS ", + stringr::str_detect(file_list, "\\.txt$") ~ "Text ", + stringr::str_detect(file_list, "\\.fst$") ~ "FST ", + stringr::str_detect(file_list, "\\.pdf$") ~ "PDF ", + stringr::str_detect(file_list, "\\.tsv$") ~ "TSV ", + stringr::str_detect(file_list, "\\.html$") ~ "HTML ", + stringr::str_detect(file_list, "\\.ppt(m|x)?$") ~ "PowerPoint ", + stringr::str_detect(file_list, "\\.md$") ~ "Markdown ", + .default = "" ) tibble::tibble( - name = list.files(filepath, pattern), - size = paste0(z, y) + name = file_list, + size = paste0(file_type, formatted_size) ) } diff --git a/R/format_postcode.R b/R/format_postcode.R index ccfc48e..d242fd5 100644 --- a/R/format_postcode.R +++ b/R/format_postcode.R @@ -19,9 +19,9 @@ #' \href{https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/283357/ILRSpecification2013_14Appendix_C_Dec2012_v1.pdf}{UK government regulations} #' mandate which letters and numbers can be used in specific sections of a #' postcode. However, these regulations are liable to change over time. For -#' this reason, \code{format_postcode} does not validate whether a given postcode -#' actually exists, or whether specific numbers and letters are being used in -#' the appropriate places. It only assesses whether the given input is +#' this reason, \code{format_postcode} does not validate whether a given +#' postcode actually exists, or whether specific numbers and letters are being +#' used in the appropriate places. It only assesses whether the given input is #' consistent with the above format and, if so, assigns the appropriate amount #' of spacing and capitalises any lower case letters. #' @@ -33,14 +33,19 @@ #' @param format A character string denoting the desired output format. Valid #' options are `pc7` and `pc8`. The default is `pc7`. See \strong{Value} #' section for more information on the string length of output values. +#' @param quiet (optional) If quiet is `TRUE` all messages and warnings will be +#' suppressed. This is useful in a production context and when you are sure of +#' the data or you are specifically using this function to remove invalid +#' postcodes. This will also make the function a bit quicker as fewer checks +#' are performed. #' #' @return When \code{format} is set equal to \code{pc7}, \code{format_postcode} #' returns a character string of length 7. 5 character postcodes have two #' spaces after the 2nd character; 6 character postcodes have 1 space after the #' 3rd character; and 7 character postcodes have no spaces. #' -#' When \code{format} is set equal to \code{pc8}, \code{format_postcode} returns a -#' character string with maximum length 8. All postcodes, whether 5, 6 or 7 +#' When \code{format} is set equal to \code{pc8}, \code{format_postcode} returns +#' a character string with maximum length 8. All postcodes, whether 5, 6 or 7 #' characters, have one space before the last 3 characters. #' #' Any input values which do not adhere to the standard UK postcode format will @@ -59,68 +64,93 @@ #' #' library(dplyr) #' df <- tibble(postcode = c("G429BA", "G207AL", "DD37JY", "DG98BS")) -#' df %>% mutate(postcode = format_postcode(postcode)) +#' df %>% +#' mutate(postcode = format_postcode(postcode)) #' @export - -format_postcode <- function(x, format = c("pc7", "pc8")) { +format_postcode <- function(x, format = c("pc7", "pc8"), quiet = FALSE) { + if (!inherits(x, "character")) { + cli::cli_abort("The input must be a {.cls character} vector, + not a {.cls {class(x)}} vector.") + } + if (!inherits(quiet, "logical")) { + cli::cli_abort( + "{.arg quiet} must be a {.cls logical}, not a {.cls {class(x)}}." + ) + } format <- match.arg(format) + x_upper <- stringr::str_to_upper(x) + + if (!quiet) { + n_lowercase <- sum(x != x_upper, na.rm = TRUE) + if (n_lowercase > 0) { + cli::cli_warn( + "{n_lowercase} value{?s} {?has/have} lower case letters these will be + converted to upper case." + ) + } + } # The standard regex for a UK postcode - uk_pc_regex <- "^[A-Za-z]{1,2}[0-9][A-Za-z0-9]?[0-9][A-Za-z]{2}$" + uk_pc_regex <- "^[A-Z]{1,2}[0-9][A-Z0-9]?[0-9][A-Z]{2}$" # Strip out all spaces from the input, so they can be added in again later at # the appropriate juncture - x <- gsub("\\s", "", x) + x_upper <- gsub(" ", "", x_upper, fixed = TRUE) # Calculate the number of non-NA values in the input which do not adhere to # the standard UK postcode format - n_bad_format <- sum(!stringr::str_detect(x, uk_pc_regex), na.rm = TRUE) + bad_format <- stringr::str_detect(x_upper, uk_pc_regex, negate = TRUE) + n_bad_format <- sum(bad_format, na.rm = TRUE) if (n_bad_format > 0) { - cli::cli_warn(c( - "{n_bad_format} non-NA input value{?s} {?does/do} not adhere to the standard UK - postcode format (with or without spaces) and will be coded as NA.", - "The standard format is:", - "*" = "1 or 2 letters, followed by", - "*" = "1 number, followed by", - "*" = "1 optional letter or number, followed by", - "*" = "1 number, followed by", - "*" = "2 letters" - )) + if (!quiet) { + cli::cli_warn(c( + "{n_bad_format} non-NA input value{?s} {?does/do} not adhere to the + standard UK postcode format (with or without spaces) and will be + coded as NA.", + "The standard format is:", + "*" = "1 or 2 letters, followed by", + "*" = "1 number, followed by", + "*" = "1 optional letter or number, followed by", + "*" = "1 number, followed by", + "*" = "2 letters" + )) + } - # Replace postcodes which do not adhere to the standard format with NA (this - # will also 'replace' NA with NA) - x <- replace(x, !stringr::str_detect(x, uk_pc_regex), NA_character_) + # Replace postcodes which do not adhere to the standard format with NA + # (this will also 'replace' NA with NA) + x_upper <- replace(x_upper, bad_format, NA_character_) } - - if (any(grepl("[a-z]", x))) { - cli::cli_warn("Lower case letters in any input value(s) adhering to the - standard UK postcode format will be converted to upper case.") - } - - x <- toupper(x) - # pc7 format requires all valid postcodes to be of length 7, meaning: # 5 character postcodes have 2 spaces after the 2nd character; # 6 character postcodes have 1 space after the 3rd character; # 7 character postcodes have no spaces + x_upper_len <- nchar(x_upper) if (format == "pc7") { return(dplyr::case_when( - is.na(x) ~ NA_character_, - nchar(x) == 5 ~ sub("(.{2})", "\\1 ", x), - nchar(x) == 6 ~ sub("(.{3})", "\\1 ", x), - nchar(x) == 7 ~ x + is.na(x_upper) ~ NA_character_, + x_upper_len == 5 ~ paste0( + substr(x_upper, 1, 2), + " ", + substr(x_upper, 3, 5) + ), + x_upper_len == 6 ~ paste0( + substr(x_upper, 1, 3), + " ", + substr(x_upper, 4, 6) + ), + x_upper_len == 7 ~ x_upper )) } else { # pc8 format requires all valid postcodes to be of maximum length 8 # All postcodes, whether 5, 6 or 7 characters, have one space before the # last 3 characters return(dplyr::case_when( - is.na(x) ~ NA_character_, - nchar(x) %in% 5:7 ~ paste( - stringr::str_sub(x, end = -4), - stringr::str_sub(x, start = -3) + is.na(x_upper) ~ NA_character_, + x_upper_len %in% 5:7 ~ paste( + stringr::str_sub(x_upper, end = -4), + stringr::str_sub(x_upper, start = -3) ) )) } diff --git a/R/match_area.R b/R/match_area.R index 5dcf6be..896c272 100644 --- a/R/match_area.R +++ b/R/match_area.R @@ -49,7 +49,6 @@ #' @export match_area <- function(x) { - # Coerce input to character to prevent any warning messages appearing about # type conversion in dplyr::left_join code_var <- as.character(x) diff --git a/R/rename.R b/R/rename.R index 593dc29..b4f053e 100644 --- a/R/rename.R +++ b/R/rename.R @@ -4,9 +4,9 @@ #' `r lifecycle::badge('deprecated')` #' #' phsmethods 0.2.1 renamed a number of functions -#' to improve code clarity. The old functions will still -#' work but will produce a warning. After a reasonable -#' amount of time they will be removed completely. +#' to improve code clarity. The old functions no longer +#' work and will error. At the next update they will be +#' removed completely. #' #' * `postcode()` -> `format_postcode()` #' * `age_group()` -> `create_age_groups()` @@ -21,7 +21,7 @@ NULL #' @rdname rename #' @export postcode <- function(x, format = c("pc7", "pc8")) { - lifecycle::deprecate_warn("0.2.1", "postcode()", "format_postcode()") + lifecycle::deprecate_stop("0.2.1", "postcode()", "format_postcode()") return(format_postcode(x = x, format = format)) } @@ -33,7 +33,7 @@ age_group <- function(x, to = 90, by = 5, as_factor = FALSE) { - lifecycle::deprecate_warn("0.2.1", "age_group()", "create_age_groups()") + lifecycle::deprecate_stop("0.2.1", "age_group()", "create_age_groups()") return(create_age_groups( x = x, @@ -47,7 +47,7 @@ age_group <- function(x, #' @rdname rename #' @export fin_year <- function(date) { - lifecycle::deprecate_warn("0.2.1", "fin_year()", "extract_fin_year()") + lifecycle::deprecate_stop("0.2.1", "fin_year()", "extract_fin_year()") return(extract_fin_year(date = date)) } diff --git a/R/sex_from_chi.R b/R/sex_from_chi.R index 8418ecb..cb4c1e9 100644 --- a/R/sex_from_chi.R +++ b/R/sex_from_chi.R @@ -40,7 +40,6 @@ #' df <- tibble(chi = c("0101011237", "0101336489", NA)) #' df %>% mutate(chi_sex = sex_from_chi(chi)) sex_from_chi <- function(chi_number, male_value = 1L, female_value = 2L, as_factor = FALSE, chi_check = TRUE) { - # Do type checking on male/female values male_class <- class(male_value) female_class <- class(female_value) diff --git a/README.Rmd b/README.Rmd index 65a0e96..9ad1ae7 100644 --- a/README.Rmd +++ b/README.Rmd @@ -16,7 +16,7 @@ knitr::opts_chunk$set( [![GitHub release (latest by date)](https://img.shields.io/github/v/release/Public-Health-Scotland/phsmethods)](https://github.com/Public-Health-Scotland/phsmethods/releases/latest) [![Build Status](https://github.com/Public-Health-Scotland/phsmethods/workflows/R-CMD-check/badge.svg)](https://github.com/Public-Health-Scotland/phsmethods/actions) -[![codecov](https://codecov.io/gh/Public-Health-Scotland/phsmethods/branch/master/graph/badge.svg)](https://codecov.io/gh/Public-Health-Scotland/phsmethods) +[![codecov](https://codecov.io/gh/Public-Health-Scotland/phsmethods/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Public-Health-Scotland/phsmethods) `phsmethods` contains functions for commonly undertaken analytical tasks in [Public Health Scotland (PHS)](https://www.publichealthscotland.scot/): @@ -33,23 +33,33 @@ knitr::opts_chunk$set( - `dob_from_chi()` extracts Date of Birth (DoB) from the CHI number - `age_from_chi()` extracts age from the CHI number -`phsmethods` can be used on both the [server](https://rstudio.nhsnss.scot.nhs.uk/) and desktop versions of RStudio. +`phsmethods` can be used on both the [PHS server](https://pwb.publichealthscotland.org/) and desktop versions of RStudio. ## Installation -To install `phsmethods`, the package `remotes` is required, and can be installed with `install.packages("remotes")`. +If you are using the PHS Posit Workbench the default repository is the PHS Posit Package Manager, the benefit of this is that `phsmethods` is listed there so you can install it with: -You can then install `phsmethods` on RStudio server from GitHub with: +``` r +install.packages("phsmethods") +``` -```{r gh-installation, eval = FALSE} -remotes::install_github("Public-Health-Scotland/phsmethods", - upgrade = "never" -) +To install `phsmethods` directly from GitHub, the package `remotes` is required, and can be +installed with `install.packages("remotes")`. + +You can then install `phsmethods` from GitHub with: + +``` r +remotes::install_github("Public-Health-Scotland/phsmethods") ``` -Network security settings may prevent `remotes::install_github()` from working on RStudio desktop. If this is the case, `phsmethods` can be installed by downloading the [zip of the repository](https://github.com/Public-Health-Scotland/phsmethods/archive/master.zip) and running the following code (replacing the section marked `<>`, including the arrows themselves): +However, network security settings may prevent `remotes::install_github()` from +working on RStudio desktop. If this is the case, `phsmethods` can be +installed by downloading the [zip of the +repository](https://github.com/Public-Health-Scotland/phsmethods/archive/master.zip) +and running the following code (replacing the section marked `<>`, +including the arrows themselves): -```{r source-installation, eval = FALSE} +``` r remotes::install_local("/phsmethods-master.zip", upgrade = "never" ) diff --git a/README.md b/README.md index 65365a1..6ba6466 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ date)](https://img.shields.io/github/v/release/Public-Health-Scotland/phsmethods)](https://github.com/Public-Health-Scotland/phsmethods/releases/latest) [![Build Status](https://github.com/Public-Health-Scotland/phsmethods/workflows/R-CMD-check/badge.svg)](https://github.com/Public-Health-Scotland/phsmethods/actions) -[![codecov](https://codecov.io/gh/Public-Health-Scotland/phsmethods/branch/master/graph/badge.svg)](https://codecov.io/gh/Public-Health-Scotland/phsmethods) +[![codecov](https://codecov.io/gh/Public-Health-Scotland/phsmethods/branch/master/graph/badge.svg)](https://app.codecov.io/gh/Public-Health-Scotland/phsmethods) `phsmethods` contains functions for commonly undertaken analytical tasks in [Public Health Scotland @@ -28,26 +28,32 @@ in [Public Health Scotland - `dob_from_chi()` extracts Date of Birth (DoB) from the CHI number - `age_from_chi()` extracts age from the CHI number -`phsmethods` can be used on both the -[server](https://rstudio.nhsnss.scot.nhs.uk/) and desktop versions of +`phsmethods` can be used on both the [PHS +server](https://pwb.publichealthscotland.org/) and desktop versions of RStudio. ## Installation -To install `phsmethods`, the package `remotes` is required, and can be -installed with `install.packages("remotes")`. +If you are using the PHS Posit Workbench the default repository is the +PHS Posit Package Manager, the benefit of this is that `phsmethods` is +listed there so you can install it with: -You can then install `phsmethods` on RStudio server from GitHub with: +``` r +install.packages("phsmethods") +``` + +To install `phsmethods` directly from GitHub, the package `remotes` is +required, and can be installed with `install.packages("remotes")`. + +You can then install `phsmethods` from GitHub with: ``` r -remotes::install_github("Public-Health-Scotland/phsmethods", - upgrade = "never" -) +remotes::install_github("Public-Health-Scotland/phsmethods") ``` -Network security settings may prevent `remotes::install_github()` from -working on RStudio desktop. If this is the case, `phsmethods` can be -installed by downloading the [zip of the +However, network security settings may prevent +`remotes::install_github()` from working on RStudio desktop. If this is +the case, `phsmethods` can be installed by downloading the [zip of the repository](https://github.com/Public-Health-Scotland/phsmethods/archive/master.zip) and running the following code (replacing the section marked `<>`, including the arrows themselves): diff --git a/man/area_lookup.Rd b/man/area_lookup.Rd index 8c84d90..9986703 100644 --- a/man/area_lookup.Rd +++ b/man/area_lookup.Rd @@ -31,6 +31,6 @@ Settlements (S20) and Scotland. } \seealso{ The script used to create the \code{area_lookup} dataset on -\href{https://github.com/Health-SocialCare-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}. +\href{https://github.com/Public-Health-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}. } \keyword{datasets} diff --git a/man/extract_fin_year.Rd b/man/extract_fin_year.Rd index b7037f8..d3e083e 100644 --- a/man/extract_fin_year.Rd +++ b/man/extract_fin_year.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract_fin_year.R \name{extract_fin_year} \alias{extract_fin_year} -\title{Assign a date to a financial year} +\title{Extract the formatted financial year from a date} \usage{ extract_fin_year(date) } @@ -14,8 +14,8 @@ class. \code{\link[base:as.Date]{as.Date()}}, can be used to store dates as an appropriate class.} } \description{ -\code{extract_fin_year} takes a date and assigns it to the correct -financial year in the PHS specified format. +\code{extract_fin_year} takes a date and extracts the +correct financial year in the PHS specified format from it. } \details{ The PHS accepted format for financial year is YYYY/YY e.g. 2017/18. diff --git a/man/format_postcode.Rd b/man/format_postcode.Rd index 5747976..b3c6b09 100644 --- a/man/format_postcode.Rd +++ b/man/format_postcode.Rd @@ -4,7 +4,7 @@ \alias{format_postcode} \title{Format a postcode} \usage{ -format_postcode(x, format = c("pc7", "pc8")) +format_postcode(x, format = c("pc7", "pc8"), quiet = FALSE) } \arguments{ \item{x}{A character string or vector of character strings. Input values @@ -16,6 +16,12 @@ warning message - see \strong{Value} section for more information.} \item{format}{A character string denoting the desired output format. Valid options are `pc7` and `pc8`. The default is `pc7`. See \strong{Value} section for more information on the string length of output values.} + +\item{quiet}{(optional) If quiet is `TRUE` all messages and warnings will be +suppressed. This is useful in a production context and when you are sure of +the data or you are specifically using this function to remove invalid +postcodes. This will also make the function a bit quicker as fewer checks +are performed.} } \value{ When \code{format} is set equal to \code{pc7}, \code{format_postcode} @@ -23,8 +29,8 @@ returns a character string of length 7. 5 character postcodes have two spaces after the 2nd character; 6 character postcodes have 1 space after the 3rd character; and 7 character postcodes have no spaces. -When \code{format} is set equal to \code{pc8}, \code{format_postcode} returns a -character string with maximum length 8. All postcodes, whether 5, 6 or 7 +When \code{format} is set equal to \code{pc8}, \code{format_postcode} returns + a character string with maximum length 8. All postcodes, whether 5, 6 or 7 characters, have one space before the last 3 characters. Any input values which do not adhere to the standard UK postcode format will @@ -58,9 +64,9 @@ The standard UK postcode format (without spaces) is: \href{https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/283357/ILRSpecification2013_14Appendix_C_Dec2012_v1.pdf}{UK government regulations} mandate which letters and numbers can be used in specific sections of a postcode. However, these regulations are liable to change over time. For -this reason, \code{format_postcode} does not validate whether a given postcode -actually exists, or whether specific numbers and letters are being used in -the appropriate places. It only assesses whether the given input is +this reason, \code{format_postcode} does not validate whether a given +postcode actually exists, or whether specific numbers and letters are being +used in the appropriate places. It only assesses whether the given input is consistent with the above format and, if so, assigns the appropriate amount of spacing and capitalises any lower case letters. } @@ -70,5 +76,6 @@ format_postcode(c("KA89NB", "PA152TY"), format = "pc8") library(dplyr) df <- tibble(postcode = c("G429BA", "G207AL", "DD37JY", "DG98BS")) -df \%>\% mutate(postcode = format_postcode(postcode)) +df \%>\% + mutate(postcode = format_postcode(postcode)) } diff --git a/man/rename.Rd b/man/rename.Rd index 567b7f1..a0e14f0 100644 --- a/man/rename.Rd +++ b/man/rename.Rd @@ -16,9 +16,9 @@ fin_year(date) \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} phsmethods 0.2.1 renamed a number of functions -to improve code clarity. The old functions will still -work but will produce a warning. After a reasonable -amount of time they will be removed completely. +to improve code clarity. The old functions no longer +work and will error. At the next update they will be +removed completely. \itemize{ \item \code{postcode()} -> \code{format_postcode()} \item \code{age_group()} -> \code{create_age_groups()} diff --git a/tests/testthat/_snaps/UNIX/file_size.md b/tests/testthat/_snaps/UNIX/file_size.md new file mode 100644 index 0000000..d2cbe3a --- /dev/null +++ b/tests/testthat/_snaps/UNIX/file_size.md @@ -0,0 +1,28 @@ +# Output is identical over time + + Code + file_size(test_path("files")) + Output + # A tibble: 8 x 2 + name size + + 1 airquality.xls Excel 26 kB + 2 bod.xlsx Excel 5 kB + 3 iris.csv CSV 4 kB + 4 mtcars.sav SPSS 4 kB + 5 plant-growth.rds RDS 316 B + 6 puromycin.txt Text 418 B + 7 stackloss.fst FST 897 B + 8 swiss.tsv TSV 1 kB + +--- + + Code + file_size(test_path("files"), "xlsx?") + Output + # A tibble: 2 x 2 + name size + + 1 airquality.xls Excel 26 kB + 2 bod.xlsx Excel 5 kB + diff --git a/tests/testthat/_snaps/format_postcode.md b/tests/testthat/_snaps/format_postcode.md new file mode 100644 index 0000000..ae6814a --- /dev/null +++ b/tests/testthat/_snaps/format_postcode.md @@ -0,0 +1,67 @@ +# Warning gives true number of values that don't adhere to format + + Code + format_postcode("g2") + Warning + 1 value has lower case letters these will be converted to upper case. + 1 non-NA input value does not adhere to the standard UK postcode format (with or without spaces) and will be coded as NA. + The standard format is: + * 1 or 2 letters, followed by + * 1 number, followed by + * 1 optional letter or number, followed by + * 1 number, followed by + * 2 letters + Output + [1] NA + +--- + + Code + format_postcode(c("DG98BS", "dg98b")) + Warning + 1 value has lower case letters these will be converted to upper case. + 1 non-NA input value does not adhere to the standard UK postcode format (with or without spaces) and will be coded as NA. + The standard format is: + * 1 or 2 letters, followed by + * 1 number, followed by + * 1 optional letter or number, followed by + * 1 number, followed by + * 2 letters + Output + [1] "DG9 8BS" NA + +--- + + Code + format_postcode(c("ML53RB", NA, "ML5", "???", 53, as.factor("ML53RB"))) + Warning + 4 non-NA input values do not adhere to the standard UK postcode format (with or without spaces) and will be coded as NA. + The standard format is: + * 1 or 2 letters, followed by + * 1 number, followed by + * 1 optional letter or number, followed by + * 1 number, followed by + * 2 letters + Output + [1] "ML5 3RB" NA NA NA NA NA + +--- + + Code + format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!"), quiet = TRUE) + Output + [1] "KY1 1RZ" NA NA NA + Code + format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!"), quiet = FALSE) + Warning + 1 value has lower case letters these will be converted to upper case. + 3 non-NA input values do not adhere to the standard UK postcode format (with or without spaces) and will be coded as NA. + The standard format is: + * 1 or 2 letters, followed by + * 1 number, followed by + * 1 optional letter or number, followed by + * 1 number, followed by + * 2 letters + Output + [1] "KY1 1RZ" NA NA NA + diff --git a/tests/testthat/_snaps/rename.md b/tests/testthat/_snaps/rename.md index ec1727e..8ddf50b 100644 --- a/tests/testthat/_snaps/rename.md +++ b/tests/testthat/_snaps/rename.md @@ -2,99 +2,126 @@ Code expect_equal(stringr::str_length(postcode("G26QE", format = "pc7")), 7) - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(stringr::str_length(postcode("G26QE", format = "pc8")), 6) - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(stringr::str_length(postcode(c("KA89NB", "PA152TY"), format = "pc7")), c(7, 7)) - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(stringr::str_length(postcode(c("KA89NB", "PA152TY"), format = "pc8")), c(7, 8)) - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(postcode("G36RB"), "G3 6RB") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(postcode("G432XR"), "G43 2XR") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(postcode("DG29BA"), "DG2 9BA") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(postcode("FK101RY"), "FK101RY") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(postcode("E1W3TJ"), "E1W 3TJ") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_equal(postcode("EC1Y8SE"), "EC1Y8SE") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code input_hampden <- c("G429BA", "g429ba", "G42 9BA", "G 4 2 9 B A", "G429b a") formatted_hampden <- suppressWarnings(postcode(input_hampden)) + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. + i Please use `format_postcode()` instead. + Code expect_true(length(unique(formatted_hampden)) == 1) + Error + object 'formatted_hampden' not found + Code expect_equal(unique(formatted_hampden), "G42 9BA") + Error + object 'formatted_hampden' not found + Code expect_true(is.na(suppressWarnings(postcode("G2?QE")))) + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. + i Please use `format_postcode()` instead. + Code expect_warning(postcode(c("G207AL", "G2O07AL"))) - Warning - 1 non-NA input value does not adhere to the standard UK postcode format (with or without spaces) and will be coded as NA. - The standard format is: - * 1 or 2 letters, followed by - * 1 number, followed by - * 1 optional letter or number, followed by - * 1 number, followed by - * 2 letters + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. + i Please use `format_postcode()` instead. Code expect_equal(suppressWarnings(postcode(c("EH7 5QG", NA, "EH11 2NL", "EH5 2HF*"))), c("EH7 5QG", NA, "EH112NL", NA)) + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. + i Please use `format_postcode()` instead. + Code input_dens <- c("Dd37Jy", "DD37JY", "D d 337JY") warnings_dens <- capture_warnings(postcode(input_dens)) + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. + i Please use `format_postcode()` instead. + Code expect_length(warnings_dens, 2) + Error + object 'warnings_dens' not found + Code input_pittodrie <- c("ab245qh", NA, "ab245q", "A B245QH") warnings_pittodrie <- capture_warnings(postcode(input_pittodrie)) + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. + i Please use `format_postcode()` instead. + Code expect_length(warnings_pittodrie, 3) + Error + object 'warnings_pittodrie' not found + Code expect_warning(postcode("g2"), "^1") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_warning(postcode(c("DG98BS", "dg98b")), "^1") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_warning(postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!")), "^3") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. Code expect_warning(postcode(c("ML53RB", NA, "ML5", "???", 53, as.factor("ML53RB"))), "^4") - Warning - `postcode()` was deprecated in phsmethods 0.2.1. + Error + `postcode()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `format_postcode()` instead. # age_group is deprecated @@ -102,8 +129,8 @@ Code expect_identical(age_group(c(4, 51, 21, 89), 0, 80, 10, as_factor = FALSE), c( "0-9", "50-59", "20-29", "80+")) - Warning - `age_group()` was deprecated in phsmethods 0.2.1. + Error + `age_group()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `create_age_groups()` instead. Code expect_identical(age_group(c(8, 94, 44, 55, 14), 0, 90, 5, as_factor = TRUE), @@ -111,105 +138,82 @@ "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90+"), ordered = TRUE)) - Warning - `age_group()` was deprecated in phsmethods 0.2.1. + Error + `age_group()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `create_age_groups()` instead. Code expect_identical(age_group(c(81, 86, 33, 11), 4, 84, 3, as_factor = FALSE), c( "79-81", "82+", "31-33", "10-12")) - Warning - `age_group()` was deprecated in phsmethods 0.2.1. + Error + `age_group()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `create_age_groups()` instead. Code expect_identical(age_group(c(0, 99, 1000, 5, 5), 5, 90, 5, as_factor = FALSE), c(NA, "90+", "90+", "5-9", "5-9")) - Warning - `age_group()` was deprecated in phsmethods 0.2.1. + Error + `age_group()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `create_age_groups()` instead. Code expect_identical(age_group(10, as_factor = TRUE), factor(c("10-14"), levels = c( "0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90+"), ordered = TRUE)) - Warning - `age_group()` was deprecated in phsmethods 0.2.1. + Error + `age_group()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `create_age_groups()` instead. Code expect_error(age_group(c("1", "57", "apple", "12"), as_factor = FALSE)) - Warning - `age_group()` was deprecated in phsmethods 0.2.1. - i Please use `create_age_groups()` instead. - Code expect_error(age_group(c("26", "9", "78", "81"), as_factor = FALSE)) - Warning - `age_group()` was deprecated in phsmethods 0.2.1. - i Please use `create_age_groups()` instead. # fin_year is deprecated Code expect_equal(fin_year(as.Date("20120331", "%Y%m%d")), "2011/12") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_equal(fin_year(as.Date("20120401", "%Y%m%d")), "2012/13") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_equal(fin_year(as.POSIXct("20190104", format = "%Y%m%d")), "2018/19") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_equal(fin_year(as.Date("17111993", "%d%m%Y")), "1993/94") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_equal(fin_year(as.Date("19980404", "%Y%m%d")), "1998/99") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_equal(fin_year(as.Date("21-Jan-2017", "%d-%B-%Y")), "2016/17") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_equal(fin_year(as.POSIXct("20181401", format = "%Y%d%m")), "2017/18") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_equal(fin_year(lubridate::dmy(29102019)), "2019/20") - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. Code expect_error(fin_year("28102019")) - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. - i Please use `extract_fin_year()` instead. - Code expect_error(fin_year("28-Oct-2019")) - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. - i Please use `extract_fin_year()` instead. - Code expect_error(fin_year(as.numeric("28102019"))) - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. - i Please use `extract_fin_year()` instead. - Code expect_error(fin_year(as.factor("28-Oct-2019"))) - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. - i Please use `extract_fin_year()` instead. - Code expect_equal(fin_year(c(lubridate::dmy(5012020), NA)), c("2019/20", NA)) - Warning - `fin_year()` was deprecated in phsmethods 0.2.1. + Error + `fin_year()` was deprecated in phsmethods 0.2.1 and is now defunct. i Please use `extract_fin_year()` instead. diff --git a/tests/testthat/_snaps/windows/file_size.md b/tests/testthat/_snaps/windows/file_size.md new file mode 100644 index 0000000..bb84b21 --- /dev/null +++ b/tests/testthat/_snaps/windows/file_size.md @@ -0,0 +1,28 @@ +# Output is identical over time + + Code + file_size(test_path("files")) + Output + # A tibble: 8 x 2 + name size + + 1 airquality.xls Excel 26 kB + 2 bod.xlsx Excel 5 kB + 3 iris.csv CSV 4 kB + 4 mtcars.sav SPSS 4 kB + 5 plant-growth.rds RDS 316 B + 6 puromycin.txt Text 442 B + 7 stackloss.fst FST 897 B + 8 swiss.tsv TSV 1 kB + +--- + + Code + file_size(test_path("files"), "xlsx?") + Output + # A tibble: 2 x 2 + name size + + 1 airquality.xls Excel 26 kB + 2 bod.xlsx Excel 5 kB + diff --git a/tests/testthat/test-dob_from_chi.R b/tests/testthat/test-dob_from_chi.R index 81ff27b..0645a54 100644 --- a/tests/testthat/test-dob_from_chi.R +++ b/tests/testthat/test-dob_from_chi.R @@ -1,3 +1,5 @@ +# A helper function to generate a 'real' CHI number i.e. one which passes +# `chi_check`, given the first 6 chars i.e. the DoB gen_real_chi <- function(first_6) { for (i in 1111:9999) { chi <- chi_pad(as.character(first_6 * 10000 + i)) @@ -8,6 +10,18 @@ gen_real_chi <- function(first_6) { } } +# A helper function to work out ages as time passes: Given the age at a given +# date, work out what the age would be today. "2022-04-01" was chosen as the +# default arbitrarily. +expected_age <- function( + expected_ages, + expected_at = lubridate::make_date(year = 2022, month = 4, day = 1)) { + expected_ages + floor(lubridate::time_length( + lubridate::interval(expected_at, Sys.Date()), + "years" + )) +} + test_that("Returns correct DoB - no options", { # Some standard CHIs / dates expect_equal( @@ -235,7 +249,7 @@ test_that("Returns correct age - no options", { "0101405073", "0101625707" )), - c(89, 82, 60) + expected_age(c(89, 82, 60)) ) # Leap years @@ -245,12 +259,13 @@ test_that("Returns correct age - no options", { gen_real_chi(290236), gen_real_chi(290296) )), - c(94, 86, 26) + expected_age(c(94, 86, 26)) ) # Century leap year (hard to test as 1900 is a long time ago!) expect_equal( - age_from_chi(gen_real_chi(290200)), 22 + age_from_chi(gen_real_chi(290200)), + expected_age(22) ) }) @@ -267,7 +282,7 @@ test_that("Returns correct age - fixed age supplied", { min_age = 1, max_age = 101 ), - c(89, 82, 60) + expected_age(c(89, 82, 60)) ) }) @@ -284,7 +299,7 @@ test_that("Returns correct age - unusual fixed age", { max_age = 72 ) ), - c(NA_real_, NA_real_, 60) + expected_age(c(NA_real_, NA_real_, 60)) ) }) diff --git a/tests/testthat/test-file_size.R b/tests/testthat/test-file_size.R index d4e2eb3..477fc49 100644 --- a/tests/testthat/test-file_size.R +++ b/tests/testthat/test-file_size.R @@ -1,5 +1,5 @@ test_that("Returns a tibble", { - expect_true(tibble::is_tibble(file_size(test_path("files")))) + expect_s3_class(file_size(test_path("files")), "tbl") }) test_that("Identifies correct number of files", { @@ -10,11 +10,21 @@ test_that("Identifies correct number of files", { }) test_that("Returns sizes with correct prefix", { - expect_true(stringr::str_detect( - file_size(test_path("files"), ".tsv$") %>% + expect_match( + file_size(test_path("files"), "tsv") %>% dplyr::pull(size), - "^TSV\\s[0-9]*\\s[A-Z]?B$" - )) + "^TSV 1 kB$" + ) + expect_match( + file_size(test_path("files"), "csv") %>% + dplyr::pull(size), + "^CSV 4 kB$" + ) + expect_match( + file_size(test_path("files"), "fst") %>% + dplyr::pull(size), + "^FST 897 B$" + ) }) test_that("Returns sizes in alphabetical order", { @@ -27,8 +37,20 @@ test_that("Returns sizes in alphabetical order", { ) }) +test_that("Output is identical over time", { + # Text files report as larger on Windows so snapshot per OS + os <- ifelse( + "windows" %in% tolower(Sys.info()[["sysname"]]), + "windows", + "UNIX" + ) + + expect_snapshot(file_size(test_path("files")), variant = os) + expect_snapshot(file_size(test_path("files"), "xlsx?"), variant = os) +}) + test_that("Errors if supplied with invalid filepath", { - expect_error(file_size(here::here("reference_files"))) + expect_error(file_size(test_path("reference_files"))) expect_error(file_size(NA)) expect_error(file_size(NULL)) }) diff --git a/tests/testthat/test-format_postcode.R b/tests/testthat/test-format_postcode.R index b1c6b85..90df08e 100644 --- a/tests/testthat/test-format_postcode.R +++ b/tests/testthat/test-format_postcode.R @@ -1,12 +1,14 @@ test_that("Creates strings of correct length", { - expect_equal(stringr::str_length(format_postcode("G26QE", format = "pc7")), 7) - expect_equal(stringr::str_length(format_postcode("G26QE", format = "pc8")), 6) - expect_equal(stringr::str_length(format_postcode(c("KA89NB", "PA152TY"), - format = "pc7" - )), c(7, 7)) - expect_equal(stringr::str_length(format_postcode(c("KA89NB", "PA152TY"), - format = "pc8" - )), c(7, 8)) + expect_equal(nchar(format_postcode("G26QE", format = "pc7")), 7) + expect_equal(nchar(format_postcode("G26QE", format = "pc8")), 6) + expect_equal( + nchar(format_postcode(c("KA89NB", "PA152TY"), format = "pc7")), + c(7, 7) + ) + expect_equal( + nchar(format_postcode(c("KA89NB", "PA152TY"), format = "pc8")), + c(7, 8) + ) }) test_that("Handles all valid outcode formats", { @@ -22,7 +24,7 @@ test_that("Parses multiple input formats", { input_hampden <- c("G429BA", "g429ba", "G42 9BA", "G 4 2 9 B A", "G429b a") formatted_hampden <- suppressWarnings(format_postcode(input_hampden)) - expect_true(length(unique(formatted_hampden)) == 1) + expect_length(unique(formatted_hampden), 1) expect_equal(unique(formatted_hampden), "G42 9BA") }) @@ -30,33 +32,121 @@ test_that("Correctly handles values which don't adhere to standard format", { expect_true(is.na(suppressWarnings(format_postcode("G2?QE")))) expect_warning(format_postcode(c("G207AL", "G2O07AL"))) expect_equal( - suppressWarnings(format_postcode(c( - "EH7 5QG", NA, - "EH11 2NL", "EH5 2HF*" - ))), + suppressWarnings(format_postcode( + c("EH7 5QG", NA, "EH11 2NL", "EH5 2HF*") + )), + c("EH7 5QG", NA, "EH112NL", NA) + ) +}) + +test_that("Output is the same with the quiet param set to TRUE", { + # Creates strings of correct length + expect_equal( + nchar(format_postcode("G26QE", format = "pc7", quiet = TRUE)), + 7 + ) + expect_equal( + nchar(format_postcode("G26QE", format = "pc8", quiet = TRUE)), + 6 + ) + expect_equal( + nchar(format_postcode( + c("KA89NB", "PA152TY"), + format = "pc7", + quiet = TRUE + )), + c(7, 7) + ) + expect_equal( + nchar(format_postcode( + c("KA89NB", "PA152TY"), + format = "pc8", + quiet = TRUE + )), + c(7, 8) + ) + + # Handles all valid outcode formats + expect_equal(format_postcode("G36RB", quiet = TRUE), "G3 6RB") + expect_equal(format_postcode("G432XR", quiet = TRUE), "G43 2XR") + expect_equal(format_postcode("DG29BA", quiet = TRUE), "DG2 9BA") + expect_equal(format_postcode("FK101RY", quiet = TRUE), "FK101RY") + expect_equal(format_postcode("E1W3TJ", quiet = TRUE), "E1W 3TJ") + expect_equal(format_postcode("EC1Y8SE", quiet = TRUE), "EC1Y8SE") + + # Parses multiple input formats + input_hampden <- c("G429BA", "g429ba", "G42 9BA", "G 4 2 9 B A", "G429b a") + formatted_hampden <- format_postcode(input_hampden, quiet = TRUE) + + expect_length(unique(formatted_hampden), 1) + expect_equal(unique(formatted_hampden), "G42 9BA") + + # Correctly handles values which don't adhere to standard format + expect_true(is.na(format_postcode("G2?QE", quiet = TRUE))) + expect_no_warning(format_postcode(c("G207AL", "G2O07AL"), quiet = TRUE)) + expect_equal( + format_postcode(c("EH7 5QG", NA, "EH11 2NL", "EH5 2HF*"), quiet = TRUE), c("EH7 5QG", NA, "EH112NL", NA) ) }) test_that("Produces correct number of warning messages", { - input_dens <- c("Dd37Jy", "DD37JY", "D d 337JY") - warnings_dens <- capture_warnings(format_postcode(input_dens)) - expect_length(warnings_dens, 1) + dens_postcodes <- c("Dd37Jy", "DD37JY", "D d 337JY") + format_postcode(dens_postcodes) %>% + expect_warning() - input_pittodrie <- c("ab245qh", NA, "ab245q", "A B245QH") - warnings_pittodrie <- capture_warnings(format_postcode(input_pittodrie)) - expect_length(warnings_pittodrie, 2) + pittodrie_postcodes <- c("ab245qh", NA, "ab245q", "A B245QH") + format_postcode(pittodrie_postcodes) %>% + expect_warning() %>% + expect_warning() }) test_that("Warning gives true number of values that don't adhere to format", { - expect_warning(format_postcode("g2"), "^1") - expect_warning(format_postcode(c("DG98BS", "dg98b")), "^1") - expect_warning(format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!")), "^3") - expect_warning( - format_postcode(c( - "ML53RB", NA, "ML5", - "???", 53, as.factor("ML53RB") - )), - "^4" + expect_snapshot(format_postcode("g2")) + expect_snapshot(format_postcode(c("DG98BS", "dg98b"))) + expect_snapshot( + format_postcode(c("ML53RB", NA, "ML5", "???", 53, as.factor("ML53RB"))) + ) + + expect_snapshot({ + format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!"), quiet = TRUE) + format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!"), quiet = FALSE) + }) +}) + +test_that("The quiet parameter suppresses messages correctly", { + expect_equal( + format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!"), quiet = TRUE), + c("KY1 1RZ", NA, NA, NA) + ) + expect_equal( + format_postcode(c("KY1 1RZ", "ky1 1rz"), quiet = TRUE), + c("KY1 1RZ", "KY1 1RZ") + ) + expect_equal( + format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!"), quiet = TRUE), + suppressWarnings(format_postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!"))) ) + expect_equal( + format_postcode(c("KY1 1RZ", "ky1 1rz"), quiet = TRUE), + suppressWarnings(format_postcode(c("KY1 1RZ", "ky1 1rz"))) + ) +}) + +test_that("Errors on invalid inputs", { + expect_error(format_postcode(NA)) + expect_error(format_postcode(123)) + expect_error(format_postcode(123)) + + expect_error(format_postcode("G26QE", format = 7)) + expect_error(format_postcode("G26QE", format = "7")) + + expect_error(format_postcode("G26QE", quiet = 1)) + expect_error(format_postcode("G26QE", quiet = "TRUE")) + + expect_error(format_postcode("G26QE", "G26QE")) + expect_error(format_postcode("G26QE", "G26QE", "G26QE")) + + expect_error(format_postcode(NA, NA)) + expect_error(format_postcode(NA, NA, NA)) }) diff --git a/tests/testthat/test-rename.R b/tests/testthat/test-rename.R index 2634293..f9c00a3 100644 --- a/tests/testthat/test-rename.R +++ b/tests/testthat/test-rename.R @@ -1,139 +1,148 @@ test_that("postcode is deprecated", { - expect_snapshot({ - expect_equal(stringr::str_length(postcode("G26QE", format = "pc7")), 7) - expect_equal(stringr::str_length(postcode("G26QE", format = "pc8")), 6) - expect_equal(stringr::str_length(postcode(c("KA89NB", "PA152TY"), - format = "pc7" - )), c(7, 7)) - expect_equal(stringr::str_length(postcode(c("KA89NB", "PA152TY"), - format = "pc8" - )), c(7, 8)) - - expect_equal(postcode("G36RB"), "G3 6RB") - expect_equal(postcode("G432XR"), "G43 2XR") - expect_equal(postcode("DG29BA"), "DG2 9BA") - expect_equal(postcode("FK101RY"), "FK101RY") - expect_equal(postcode("E1W3TJ"), "E1W 3TJ") - expect_equal(postcode("EC1Y8SE"), "EC1Y8SE") - - input_hampden <- c("G429BA", "g429ba", "G42 9BA", "G 4 2 9 B A", "G429b a") - formatted_hampden <- suppressWarnings(postcode(input_hampden)) - - expect_true(length(unique(formatted_hampden)) == 1) - expect_equal(unique(formatted_hampden), "G42 9BA") - - expect_true(is.na(suppressWarnings(postcode("G2?QE")))) - expect_warning(postcode(c("G207AL", "G2O07AL"))) - expect_equal( - suppressWarnings(postcode(c( - "EH7 5QG", NA, - "EH11 2NL", "EH5 2HF*" - ))), - c("EH7 5QG", NA, "EH112NL", NA) - ) - - input_dens <- c("Dd37Jy", "DD37JY", "D d 337JY") - warnings_dens <- capture_warnings(postcode(input_dens)) - expect_length(warnings_dens, 2) - - input_pittodrie <- c("ab245qh", NA, "ab245q", "A B245QH") - warnings_pittodrie <- capture_warnings(postcode(input_pittodrie)) - expect_length(warnings_pittodrie, 3) - - expect_warning(postcode("g2"), "^1") - expect_warning(postcode(c("DG98BS", "dg98b")), "^1") - expect_warning(postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!")), "^3") - expect_warning( - postcode(c( - "ML53RB", NA, "ML5", - "???", 53, as.factor("ML53RB") - )), - "^4" - ) - }) + expect_snapshot( + { + expect_equal(stringr::str_length(postcode("G26QE", format = "pc7")), 7) + expect_equal(stringr::str_length(postcode("G26QE", format = "pc8")), 6) + expect_equal(stringr::str_length(postcode(c("KA89NB", "PA152TY"), + format = "pc7" + )), c(7, 7)) + expect_equal(stringr::str_length(postcode(c("KA89NB", "PA152TY"), + format = "pc8" + )), c(7, 8)) + + expect_equal(postcode("G36RB"), "G3 6RB") + expect_equal(postcode("G432XR"), "G43 2XR") + expect_equal(postcode("DG29BA"), "DG2 9BA") + expect_equal(postcode("FK101RY"), "FK101RY") + expect_equal(postcode("E1W3TJ"), "E1W 3TJ") + expect_equal(postcode("EC1Y8SE"), "EC1Y8SE") + + input_hampden <- c("G429BA", "g429ba", "G42 9BA", "G 4 2 9 B A", "G429b a") + formatted_hampden <- suppressWarnings(postcode(input_hampden)) + + expect_true(length(unique(formatted_hampden)) == 1) + expect_equal(unique(formatted_hampden), "G42 9BA") + + expect_true(is.na(suppressWarnings(postcode("G2?QE")))) + expect_warning(postcode(c("G207AL", "G2O07AL"))) + expect_equal( + suppressWarnings(postcode(c( + "EH7 5QG", NA, + "EH11 2NL", "EH5 2HF*" + ))), + c("EH7 5QG", NA, "EH112NL", NA) + ) + + input_dens <- c("Dd37Jy", "DD37JY", "D d 337JY") + warnings_dens <- capture_warnings(postcode(input_dens)) + expect_length(warnings_dens, 2) + + input_pittodrie <- c("ab245qh", NA, "ab245q", "A B245QH") + warnings_pittodrie <- capture_warnings(postcode(input_pittodrie)) + expect_length(warnings_pittodrie, 3) + + expect_warning(postcode("g2"), "^1") + expect_warning(postcode(c("DG98BS", "dg98b")), "^1") + expect_warning(postcode(c("KY1 1RZ", "ky1rz", "KY11 R", "KY11R!")), "^3") + expect_warning( + postcode(c( + "ML53RB", NA, "ML5", + "???", 53, as.factor("ML53RB") + )), + "^4" + ) + }, + error = TRUE + ) }) test_that("age_group is deprecated", { - expect_snapshot({ - expect_identical( - age_group(c(4, 51, 21, 89), - 0, 80, 10, - as_factor = FALSE - ), - c("0-9", "50-59", "20-29", "80+") - ) - - expect_identical( - age_group(c(8, 94, 44, 55, 14), - 0, 90, 5, - as_factor = TRUE - ), - factor(c("5-9", "90+", "40-44", "55-59", "10-14"), - levels = c( - "0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", - "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", - "65-69", "70-74", "75-79", "80-84", "85-89", "90+" + expect_snapshot( + { + expect_identical( + age_group(c(4, 51, 21, 89), + 0, 80, 10, + as_factor = FALSE + ), + c("0-9", "50-59", "20-29", "80+") + ) + + expect_identical( + age_group(c(8, 94, 44, 55, 14), + 0, 90, 5, + as_factor = TRUE ), - ordered = TRUE + factor(c("5-9", "90+", "40-44", "55-59", "10-14"), + levels = c( + "0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", + "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", + "65-69", "70-74", "75-79", "80-84", "85-89", "90+" + ), + ordered = TRUE + ) ) - ) - - expect_identical( - age_group(c(81, 86, 33, 11), - 4, 84, 3, - as_factor = FALSE - ), - c("79-81", "82+", "31-33", "10-12") - ) - - expect_identical( - age_group(c(0, 99, 1000, 5, 5), - 5, 90, 5, - as_factor = FALSE - ), - c(NA, "90+", "90+", "5-9", "5-9") - ) - - expect_identical( - age_group(10, as_factor = TRUE), - factor(c("10-14"), - levels = c( - "0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", - "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", - "65-69", "70-74", "75-79", "80-84", "85-89", "90+" + + expect_identical( + age_group(c(81, 86, 33, 11), + 4, 84, 3, + as_factor = FALSE + ), + c("79-81", "82+", "31-33", "10-12") + ) + + expect_identical( + age_group(c(0, 99, 1000, 5, 5), + 5, 90, 5, + as_factor = FALSE ), - ordered = TRUE + c(NA, "90+", "90+", "5-9", "5-9") + ) + + expect_identical( + age_group(10, as_factor = TRUE), + factor(c("10-14"), + levels = c( + "0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", + "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", + "65-69", "70-74", "75-79", "80-84", "85-89", "90+" + ), + ordered = TRUE + ) ) - ) - # If x is not numeric cut will error - expect_error(age_group(c("1", "57", "apple", "12"), as_factor = FALSE)) + # If x is not numeric cut will error + expect_error(age_group(c("1", "57", "apple", "12"), as_factor = FALSE)) - # This is true even if all elements are numbers stored as character - expect_error(age_group(c("26", "9", "78", "81"), as_factor = FALSE)) - }) + # This is true even if all elements are numbers stored as character + expect_error(age_group(c("26", "9", "78", "81"), as_factor = FALSE)) + }, + error = TRUE + ) }) test_that("fin_year is deprecated", { - expect_snapshot({ - expect_equal(fin_year(as.Date("20120331", "%Y%m%d")), "2011/12") - expect_equal(fin_year(as.Date("20120401", "%Y%m%d")), "2012/13") - expect_equal(fin_year(as.POSIXct("20190104", format = "%Y%m%d")), "2018/19") - - expect_equal(fin_year(as.Date("17111993", "%d%m%Y")), "1993/94") - expect_equal(fin_year(as.Date("19980404", "%Y%m%d")), "1998/99") - expect_equal(fin_year(as.Date("21-Jan-2017", "%d-%B-%Y")), "2016/17") - expect_equal(fin_year(as.POSIXct("20181401", format = "%Y%d%m")), "2017/18") - expect_equal(fin_year(lubridate::dmy(29102019)), "2019/20") - - expect_error(fin_year("28102019")) - expect_error(fin_year("28-Oct-2019")) - expect_error(fin_year(as.numeric("28102019"))) - expect_error(fin_year(as.factor("28-Oct-2019"))) - - expect_equal(fin_year(c(lubridate::dmy(05012020), NA)), c("2019/20", NA)) - }) + expect_snapshot( + { + expect_equal(fin_year(as.Date("20120331", "%Y%m%d")), "2011/12") + expect_equal(fin_year(as.Date("20120401", "%Y%m%d")), "2012/13") + expect_equal(fin_year(as.POSIXct("20190104", format = "%Y%m%d")), "2018/19") + + expect_equal(fin_year(as.Date("17111993", "%d%m%Y")), "1993/94") + expect_equal(fin_year(as.Date("19980404", "%Y%m%d")), "1998/99") + expect_equal(fin_year(as.Date("21-Jan-2017", "%d-%B-%Y")), "2016/17") + expect_equal(fin_year(as.POSIXct("20181401", format = "%Y%d%m")), "2017/18") + expect_equal(fin_year(lubridate::dmy(29102019)), "2019/20") + + expect_error(fin_year("28102019")) + expect_error(fin_year("28-Oct-2019")) + expect_error(fin_year(as.numeric("28102019"))) + expect_error(fin_year(as.factor("28-Oct-2019"))) + + expect_equal(fin_year(c(lubridate::dmy(05012020), NA)), c("2019/20", NA)) + }, + error = TRUE + ) })