Public-Health-Scotland · Tina815 · Sep 15, 2023 · Feb 1, 2023 · Feb 1, 2023 · Feb 1, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,11 +1,12 @@
 Package: phsmethods
 Title: Standard Methods for use in Public Health Scotland
-Version: 0.2.2
+Version: 0.2.3
 Authors@R: c(
-    person("David", "Caldwell", , "[email protected]", role = c("aut", "cre")),
+    person("Public Health Scotland", , , "[email protected]", role = "cph"),
+    person("David", "Caldwell", , "[email protected]", role = "aut"),
     person("Lucinda", "Lawrie", , "[email protected]", role = "rev"),
     person("Jack", "Hannah", , "[email protected]", role = "aut"),
-    person("Tina", "Fu", , "[email protected]", role = "aut"),
+    person("Tina", "Fu", , "[email protected]", role = c("aut", "cre")),
     person("Ciara", "Gribben", , "[email protected]", role = "aut"),
     person("Chris", "Deans", , "[email protected]", role = "aut"),
     person("Jaime", "Villacampa", , "[email protected]", role = "aut"),
@@ -19,22 +20,22 @@ Authors@R: c(
 Description: Bespoke functions for commonly undertaken analytical tasks in
     Public Health Scotland.
 License: GPL (>= 2)
-URL: https://github.com/Health-SocialCare-Scotland/phsmethods,
+URL: https://github.com/Public-Health-Scotland/phsmethods,
     https://public-health-scotland.github.io/phsmethods/
 BugReports: 
-    https://github.com/Health-SocialCare-Scotland/phsmethods/issues
+    https://github.com/Public-Health-Scotland/phsmethods/issues
 Depends:
     R (>= 2.10)
 Imports:
     cli,
     dplyr,
-    gdata,
     lifecycle,
     lubridate,
     magrittr,
     purrr,
     readr,
     rlang,
+    scales (>= 1.0.0),
     stringr,
     tibble,
     utils
@@ -49,4 +50,4 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 Language: en-GB
 LazyData: true
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
diff --git a/NEWS.md b/NEWS.md
@@ -1,16 +1,26 @@
-# phsmethods 0.2.2
+# phsmethods 0.2.3 (2023-09-11)
 
-- Improve `chi_check()` function to make it more efficient and run faster.
+- The [{gdata}](https://github.com/r-gregmisc/gdata) import has been dropped and replaced with [{scales}](https://scales.r-lib.org/).
 
-- Improve "Using phsmethods" section in readme to be shorter and more accessible. 
+- `extract_fin_year()` is now much faster and will use less memory, especially for smaller vectors (1 to 1,000).
 
-- Update all errors, warnings and messages to use the `cli` package. 
+- `format_postcode()` is now faster and also gains a `quiet` parameter, the default value is `FALSE` but setting it to `TRUE` will skip some of the checks and messages, this is useful when using `format_postcode()` to 'clean-up' and format a vector of postcodes, rather than wanting to check them. Because of the skipped checks `quiet = TRUE` should also run faster.
+
+- The installation instructions in the README have been updated.
+
+# phsmethods 0.2.2 (2022-11-14)
+
+- Improved `chi_check()` to make it more efficient and run faster.
+
+- Improved the "Using phsmethods" section in the README to be shorter and more accessible. 
+
+- Update all errors, warnings and messages to use [{cli}](https://cli.r-lib.org/). 
 
 - Improve errors when giving incorrect types to some functions.
 
 # phsmethods 0.2.1 (2022-02-11)
 
-- Three functions renamed to improve code clarity: `postcode()` to `format_postcode()`; `age_group()` to `create_age_groups()`; `fin_year()` to `extract_fin_year()`. The old functions will still work but will produce a warning. After a reasonable amount of time they will be removed completely.
+- Three functions renamed to improve code clarity: `postcode()` to `format_postcode()`; `age_group()` to `create_age_groups()`; `fin_year()` to `extract_fin_year()`. The old functions will still work but will produce a warning. After a reasonable amount of time, they will be removed completely.
 
 - New functions added:
 `age_calculate()`([#65](https://github.com/Public-Health-Scotland/phsmethods/issues/65), [@Nic-chr](https://github.com/Nic-Chr));
@@ -21,7 +31,7 @@
 
 # phsmethods 0.2.0 (2020-04-17)
 
-- New functions added: `age_group()`([#23](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/23), [@chrisdeans](https://github.com/chrisdeans)); `chi_check()`([#30](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); `chi_pad()`([#30](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); and `match_area()`([#13](https://github.com/Health-SocialCare-Scotland/phsmethods/issues/13), [@jvillacampa](https://github.com/jvillacampa)).
+- New functions added: `age_group()`([#23](https://github.com/Public-Health-Scotland/phsmethods/issues/23), [@chrisdeans](https://github.com/chrisdeans)); `chi_check()`([#30](https://github.com/Public-Health-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); `chi_pad()`([#30](https://github.com/Public-Health-Scotland/phsmethods/issues/30), [@graemegowans](https://github.com/graemegowans)); and `match_area()`([#13](https://github.com/Public-Health-Scotland/phsmethods/issues/13), [@jvillacampa](https://github.com/jvillacampa)).
 
 - The first argument of `postcode()` is now `x`, as opposed to `string`. This is unlikely to break much, if any, existing code. `postcode()` is also now slightly faster.
 

diff --git a/R/age_calculate.R b/R/age_calculate.R
@@ -1,17 +1,17 @@
 #' Calculate age between two dates
 #'
 #' @description This function calculates the age between two dates using functions in \code{lubridate}.
 #' It calculates age in either years or months.
 #'
 #' @param start A start date (e.g. date of birth) which must be supplied with \code{Date} or \code{POSIXct} or \code{POSIXlt}
 #' class. \code{\link[base:as.Date]{as.Date()}},
 #' \code{\link[lubridate:ymd]{lubridate::dmy()}} and
 #' \code{\link[base:as.POSIXlt]{as.POSIXct()}} are examples of functions which
 #' can be used to store dates as an appropriate class.
 #' @param end An end date which must be supplied with \code{Date} or \code{POSIXct} or \code{POSIXlt} class.
 #' Default is \code{Sys.Date()} or \code{Sys.time()} depending on the class of \code{start}.
 #' @param units Type of units to be used. years and months are accepted. Default is \code{years}.
 #' @param round_down Should returned ages be rounded down to the nearest whole number. Default is \code{TRUE}.
 #'
 #' @examples
 #' library(lubridate)
@@ -20,7 +20,7 @@
 #' age_calculate(birth_date, end_date)
 #' age_calculate(birth_date, end_date, units = "months")
 #'
 #' # If the start day is leap day (February 29th), age increases on 1st March every year.
 #' leap1 <- lubridate::ymd("2020-02-29")
 #' leap2 <- lubridate::ymd("2022-02-28")
 #' leap3 <- lubridate::ymd("2022-03-01")
@@ -28,10 +28,9 @@
 #' age_calculate(leap1, leap2)
 #' age_calculate(leap1, leap3)
 #' @export
 age_calculate <- function(start, end = if (lubridate::is.Date(start)) Sys.Date() else Sys.time(),
                           units = c("years", "months"), round_down = TRUE) {
-
   make_inheritance_checks(list(start = start, end = end), target_classes = c("Date", "POSIXt"), ignore_null = FALSE)
 
   units <- match.arg(tolower(units), c("years", "months"))


diff --git a/R/data.R b/R/data.R
@@ -11,7 +11,7 @@
 #' Settlements (S20) and Scotland.
 #'
 #' @seealso The script used to create the \code{area_lookup} dataset on
-#' \href{https://github.com/Health-SocialCare-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}.
+#' \href{https://github.com/Public-Health-Scotland/phsmethods/blob/master/data-raw/area_lookup.R}{GitHub}.
 #'
 #' @format A \code{\link[tibble]{tibble}} with 2 variables and over 17,000 rows:
 #' \describe{

diff --git a/R/dob_from_chi.R b/R/dob_from_chi.R
@@ -38,7 +38,6 @@
 #'     max_date = adm_date
 #'   ))
 dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check = TRUE) {
-
   # Do type checking on the params
   if (!inherits(chi_number, "character")) {
     cli::cli_abort("{.arg chi_number} must be a {.cls character} vector, not a {.cls {class(chi_number)}} vector.")
@@ -169,7 +168,6 @@ dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check
 #'     ref_date = dis_date
 #'   ))
 age_from_chi <- function(chi_number, ref_date = NULL, min_age = 0, max_age = NULL, chi_check = TRUE) {
-
   # Do type checking on the params
   if (!inherits(chi_number, "character")) {
     cli::cli_abort("{.arg chi_number} must be a {.cls character} vector, not a {.cls {class(chi_number)}} vector.")

diff --git a/R/extract_fin_year.R b/R/extract_fin_year.R
@@ -1,7 +1,7 @@
-#' @title Assign a date to a financial year
+#' @title Extract the formatted financial year from a date
 #'
-#' @description \code{extract_fin_year} takes a date and assigns it to the correct
-#' financial year in the PHS specified format.
+#' @description \code{extract_fin_year} takes a date and extracts the
+#' correct financial year in the PHS specified format from it.
 #'
 #' @details The PHS accepted format for financial year is YYYY/YY e.g. 2017/18.
 #'
@@ -17,7 +17,8 @@
 #' @export
 extract_fin_year <- function(date) {
   if (!inherits(date, c("Date", "POSIXct"))) {
-    cli::cli_abort("{.arg date} must be a {.cls Date} or {.cls POSIXct} vector, not a {.cls {class(date)}} vector.")
+    cli::cli_abort("{.arg date} must be a {.cls Date} or {.cls POSIXct} vector,
+                   not a {.cls {class(date)}} vector.")
   }
 
   # Simply converting all elements of the input vector resulted in poor
@@ -26,29 +27,18 @@ extract_fin_year <- function(date) {
   # and then match them back on to the original input. This vastly improves
   # performance for large inputs.
 
-  x <- tibble::tibble(dates = unique(date)) %>%
-    dplyr::mutate(
-      fyear = paste0(
-        ifelse(lubridate::month(.data$dates) >= 4,
-          lubridate::year(.data$dates),
-          lubridate::year(.data$dates) - 1
-        ),
-        "/",
-        substr(
-          ifelse(lubridate::month(.data$dates) >= 4,
-            lubridate::year(.data$dates) + 1,
-            lubridate::year(.data$dates)
-          ),
-          3, 4
-        )
-      ),
-      fyear = ifelse(is.na(.data$dates),
-        NA_character_,
-        .data$fyear
-      )
-    )
+  unique_date <- unique(date)
 
-  tibble::tibble(dates = date) %>%
-    dplyr::left_join(x, by = "dates") %>%
-    dplyr::pull(.data$fyear)
+  unique_fy_q <-
+    lubridate::year(unique_date) - (lubridate::month(unique_date) %in% 1:3)
+
+  unique_fy <- ifelse(
+    is.na(unique_date),
+    NA_character_,
+    paste0(unique_fy_q, "/", (unique_fy_q %% 100L) + 1L)
+  )
+
+  fin_years <- unique_fy[match(date, unique_date)]
+
+  return(fin_years)
 }
diff --git a/R/file_size.R b/R/file_size.R
@@ -84,41 +84,34 @@ file_size <- function(filepath = getwd(), pattern = NULL) {
     cli::cli_abort("{.arg pattern} must be a {.cls character}, not a {.cls {class(pattern)}}.")
   }
 
-  x <- dir(path = filepath, pattern = pattern)
+  file_list <- list.files(path = filepath, pattern = pattern)
 
-  if (length(x) == 0) {
+  if (length(file_list) == 0) {
     return(NULL)
   }
 
-  y <- x %>%
-    purrr::map_dbl(~ file.info(paste0(filepath, "/", .))$size) %>%
-    # The gdata package defines a kilobyte (KB) as 1,000 bytes, and a
-    # kibibyte (KiB) as 1,024 bytes
-    # In PHS a kilobyte is normally taken to be 1,024 bytes
-    # As a workaround, calculate file sizes in kibibytes (or higher), then
-    # drop the `i` from the output
-    gdata::humanReadable(standard = "IEC", digits = 0) %>%
-    gsub("i", "", .) %>%
-    trimws()
+  formatted_size <- file.path(filepath, file_list) %>%
+    file.size() %>%
+    scales::number_bytes(units = "si")
 
-  z <- dplyr::case_when(
-    stringr::str_detect(x, "\\.xls(b|m|x)?$") ~ "Excel ",
-    stringr::str_detect(x, "\\.csv$") ~ "CSV ",
-    stringr::str_detect(x, "\\.z?sav$") ~ "SPSS ",
-    stringr::str_detect(x, "\\.doc(m|x)?$") ~ "Word ",
-    stringr::str_detect(x, "\\.rds$") ~ "RDS ",
-    stringr::str_detect(x, "\\.txt$") ~ "Text ",
-    stringr::str_detect(x, "\\.fst$") ~ "FST ",
-    stringr::str_detect(x, "\\.pdf$") ~ "PDF ",
-    stringr::str_detect(x, "\\.tsv$") ~ "TSV ",
-    stringr::str_detect(x, "\\.html$") ~ "HTML ",
-    stringr::str_detect(x, "\\.ppt(m|x)?$") ~ "PowerPoint ",
-    stringr::str_detect(x, "\\.md$") ~ "Markdown ",
+  file_type <- dplyr::case_when(
+    stringr::str_detect(file_list, "\\.xls(b|m|x)?$") ~ "Excel ",
+    stringr::str_detect(file_list, "\\.csv$") ~ "CSV ",
+    stringr::str_detect(file_list, "\\.z?sav$") ~ "SPSS ",
+    stringr::str_detect(file_list, "\\.doc(m|x)?$") ~ "Word ",
+    stringr::str_detect(file_list, "\\.rds$") ~ "RDS ",
+    stringr::str_detect(file_list, "\\.txt$") ~ "Text ",
+    stringr::str_detect(file_list, "\\.fst$") ~ "FST ",
+    stringr::str_detect(file_list, "\\.pdf$") ~ "PDF ",
+    stringr::str_detect(file_list, "\\.tsv$") ~ "TSV ",
+    stringr::str_detect(file_list, "\\.html$") ~ "HTML ",
+    stringr::str_detect(file_list, "\\.ppt(m|x)?$") ~ "PowerPoint ",
+    stringr::str_detect(file_list, "\\.md$") ~ "Markdown ",
     TRUE ~ ""
   )
 
   tibble::tibble(
-    name = list.files(filepath, pattern),
-    size = paste0(z, y)
+    name = file_list,
+    size = paste0(file_type, formatted_size)
   )
 }