Skip to content

Commit

Permalink
Updated extract_fin_year.
Browse files Browse the repository at this point in the history
  • Loading branch information
Nic-Chr committed Nov 20, 2023
1 parent 63d6821 commit baec35a
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 16 deletions.
28 changes: 12 additions & 16 deletions R/extract_fin_year.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
#' extract_fin_year(x)
#' @export
extract_fin_year <- function(date) {
if (!inherits(date, c("Date", "POSIXct"))) {
cli::cli_abort("{.arg date} must be a {.cls Date} or {.cls POSIXct} vector,
if (!inherits(date, c("Date", "POSIXt"))) {
cli::cli_abort("{.arg date} must be a {.cls Date} or {.cls POSIXt} vector,
not a {.cls {class(date)}} vector.")
}

Expand All @@ -29,18 +29,14 @@ extract_fin_year <- function(date) {
# and then match them back on to the original input. This vastly improves
# performance for large inputs.

unique_date <- unique(date)

unique_fy_q <-
lubridate::year(unique_date) - (lubridate::month(unique_date) %in% 1:3)

unique_fy <- ifelse(
is.na(unique_date),
NA_character_,
paste0(unique_fy_q, "/", (unique_fy_q %% 100L) + 1L)
)

fin_years <- unique_fy[match(date, unique_date)]

return(fin_years)
# Note: lubridate year and month coerce to double
# We only need integers for our purposes
posix <- as.POSIXlt(date, tz = lubridate::tz(date))
y <- posix$year + 1900L
m <- posix$mon + 1L
fy <- y - ( (m - 3L) %/% 1L <= 0L )
next_fy <- (fy + 1L) %% 100L
out <- sprintf("%.4d/%02d", fy, next_fy)
out[is.na(date)] <- NA_character_
out
}
82 changes: 82 additions & 0 deletions tests/testthat/test-extract_fin_year.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,85 @@ test_that("Non-date formats produce an error", {
test_that("NAs are handled correctly", {
expect_equal(extract_fin_year(c(lubridate::dmy(05012020), NA)), c("2019/20", NA))
})

test_that("YYYY/YY format applied correctly", {
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "01/April/1999"), NA)),
c(NA, "1999/00", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "01/April/2000"), NA)),
c(NA, "2000/01", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "01/April/2001"), NA)),
c(NA, "2001/02", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "31/March/1999"), NA)),
c(NA, "1998/99", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "31/March/2000"), NA)),
c(NA, "1999/00", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "31/March/2001"), NA)),
c(NA, "2000/01", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "01/December/1999"), NA)),
c(NA, "1999/00", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "01/December/2000"), NA)),
c(NA, "2000/01", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "01/December/2999"), NA)),
c(NA, "2999/00", NA))
expect_equal(extract_fin_year(c(lubridate::dmy(NA, "01/December/3000"), NA)),
c(NA, "3000/01", NA))

expect_equal(extract_fin_year(
lubridate::as_datetime(
c(lubridate::dmy(NA, "01/April/1999"), NA)
)
),
c(NA, "1999/00", NA))

expect_equal(extract_fin_year(
lubridate::as_datetime(
c(lubridate::dmy(NA, "01/December/2000"), NA)
)
),
c(NA, "2000/01", NA))

expect_equal(extract_fin_year(
lubridate::as_datetime(
c(lubridate::dmy(NA, "01/April/0001"), NA)
)
),
c(NA, "0001/02", NA))
})

test_that("Correct outputs", {
start <- lubridate::dmy("01/04/1999")
end <- lubridate::dmy("31/03/2024")
x <- seq(start, end, by = "day")
year_breaks <- start + lubridate::years(0:25)
fin_years <- extract_fin_year(x)

df <- dplyr::tibble(x = x, fin_years = fin_years)
out <- dplyr::summarise(df,
start = min(x),
end = max(x),
n = dplyr::n(),
.by = fin_years)

# We'll compare the above output by first creating an expected result
# This will be a tibble of start and end dates for each financial year
# We then use that find the number of days that lie between
# each pair

start_end_df <- dplyr::tibble(start = seq(start, end, by = "year"))
start_end_df$end <- start_end_df$start + lubridate::years(1) - lubridate::days(1)

start_end_df <- start_end_df %>%
dplyr::group_by(start, end) %>%
dplyr::mutate(n = sum(dplyr::between(x, start, end)))

expected_years <- lubridate::year(start_end_df$start)
expected_years2 <- lubridate::year(start_end_df$end) %% 100
expected_years2_padded <- stringr::str_pad(expected_years2, width = 2,
side = "left", pad = "0")
expected_fin_years <- paste0(expected_years, "/", expected_years2_padded)

expect_equal(out$fin_years, expected_fin_years)
expect_equal(out$start, start_end_df$start)
expect_equal(out$end, start_end_df$end)
expect_equal(out$n, start_end_df$n)
})

0 comments on commit baec35a

Please sign in to comment.