Skip to content

Commit

Permalink
Add pivoting of the pop data
Browse files Browse the repository at this point in the history
  • Loading branch information
Moohan committed Jun 19, 2024
1 parent 28b87b8 commit 944762d
Show file tree
Hide file tree
Showing 3 changed files with 171 additions and 21 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ Imports:
dplyr,
fs,
glue,
janitor,
phsmethods,
readr,
rlang,
tibble
tibble,
tidyr
Suggests:
testthat (>= 3.0.0)
Config/testthat/edition: 3
Expand Down
125 changes: 114 additions & 11 deletions R/get_pop_est.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,77 @@
#' Get population estimates
#'
#' @param level one of "datazone", "intzone", "hscp", "ca" or "hb"
#' @param version default is "latest"
#' @param min_year,max_year (optional) filter years
#' @param age_groups should age groups be used
#' @param ... arguments passed to [phsmethods::create_age_groups()]
#' This function retrieves population estimates based on various parameters.
#' It reads population data from a specified file and filters it based on the
#' input parameters. The function also allows for grouping by age and pivoting
#' the data for wider format.
#' @param level The geographic level for which to retrieve population estimates.
#' One of "datazone", "intzone", "hscp", "ca", or "hb".
#' @param version The version of the population estimates to use (default: "latest").

Check warning on line 9 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=9,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 85 characters.
#' @param min_year,max_year (optional) The minimum and maximum years to include in the results.

Check warning on line 10 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=10,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 95 characters.
#' @param age_groups Logical, indicating whether to aggregate population estimates by age groups.

Check warning on line 11 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=11,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 97 characters.
#' If `TRUE`, the `phsmethods::create_age_groups` function is used.
#' @param pivot_wider Optionally reshape the data into a wider format, summarising population counts by the specified columns.

Check warning on line 13 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=13,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 126 characters.
#' Allowed values:
#' * `FALSE` (default): Do not pivot.
#' * `TRUE` or `"all"`: Pivot by both sex and age/age group.
#' * `"age"`: Pivot by age/age group only.
#' * `"age-only"`: Pivot by age/age group and aggregate to remove sex.
#' * `"sex"`: Pivot by sex only.
#' * `"sex-only"`: Pivot by sex group and aggregate to remove age/age group
#' @param ... Additional arguments passed to [phsmethods::create_age_groups()].
#'
#' @return A tibble containing the filtered and possibly transformed population data.

Check warning on line 23 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=23,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 85 characters.
#'
#' @note
#' Depending on the values for `age_groups` and `pivot_wider`, the resulting
#' columns in the returned tibble will vary. Refer to the examples below for
#' illustration.
#'
#' @return the pop data as a tibble
#' @export
#'
#' @examples
#' # Basic Usage: Datazone Population Estimates (no filtering)
#' get_pop_est("datazone")
#' get_pop_est("hb", min_year = 1995, max_year = 2020)
#' get_pop_est("ca", age_groups = TRUE, by = 10)
#'
#' # Filter by Year:
#' get_pop_est("ca", min_year = 1995, max_year = 2020)
#'
#' # Age Groups: Health Board (HB) Population Estimates by Age Group
#' get_pop_est("hb", age_groups = TRUE)
#'
#' # Age Groups with Custom Settings:
#' # Aggregate into 5-year age groups, with an open-ended final group "85+"
#' get_pop_est("hb", age_groups = TRUE, by = 5, to = "85+")
#'
#' # Pivot Wider (All): CA Population Estimates, Reshaped by Sex and Age Group
#' # The result will have columns for each combination of sex and age group,
#' # e.g., "pop_f_0_4", "pop_m_5_9", etc.
#' get_pop_est("ca", age_groups = TRUE, pivot_wider = "all")
#'
#' # Pivot Wider (Age Only): CA Population Estimates, Reshaped by Age Group Only
#' # This is useful if you only need the total population for each age group, regardless of sex.

Check warning on line 52 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=52,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 96 characters.
#' get_pop_est("ca", age_groups = TRUE, pivot_wider = "age-only")
#'
#' # Combined Filtering, Age Groups, and Pivoting:
#' # CA population from 2015-2020, aggregated by 10-year age groups, and pivoted by sex

Check warning on line 56 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=56,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 87 characters.
#' # The result will have columns for each sex ("pop_f", "pop_m") and a row per age group.

Check warning on line 57 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=57,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 90 characters.
#' get_pop_est("ca", min_year = 2015, max_year = 2020, age_groups = TRUE, by = 10, pivot_wider = "sex")

Check warning on line 58 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=58,col=81,[line_length_linter] Lines should not be more than 80 characters. This line is 103 characters.
get_pop_est <- function(

Check warning on line 59 in R/get_pop_est.R

View workflow job for this annotation

GitHub Actions / lint

file=R/get_pop_est.R,line=59,col=1,[cyclocomp_linter] Functions should have cyclomatic complexity of less than 15, this has 18.
level = c("datazone", "intzone", "hscp", "ca", "hb"),
version = "latest",
min_year = NULL,
max_year = NULL,
age_groups = FALSE,
pivot_wider = FALSE,
...) {
level <- rlang::arg_match(level)
if (!inherits(pivot_wider, "logical")) {
pivot_wider <- rlang::arg_match(
pivot_wider,
values = c("all", "age", "age-only", "sex", "sex-only")
)
}

ext <- "rds"
pop_dir <- fs::path(get_lookups_dir(), "Populations", "Estimates")

Expand Down Expand Up @@ -64,14 +115,66 @@ get_pop_est <- function(
}

if (age_groups) {
pop_est <- pop_est %>%
pop_est <- pop_est |>
dplyr::mutate(
age_group = phsmethods::create_age_groups(x = age, ...),
.keep = "unused"
) %>%
dplyr::group_by(dplyr::across(!pop)) %>%
) |>
dplyr::group_by(dplyr::across(!pop)) |>
dplyr::summarise(pop = sum(pop), .groups = "drop")
}

if (pivot_wider %in% list(TRUE, "all")) {
pop_est <- pop_est |>
tidyr::pivot_wider(
id_cols = -"sex",
names_from = c(
"sex_name",
dplyr::if_else(age_groups, "age_group", "age")
),
values_from = "pop",
names_prefix = "pop_",
names_repair = janitor::make_clean_names
)
} else if (pivot_wider == "sex") {
pop_est <- pop_est |>
tidyr::pivot_wider(
id_cols = c(-"sex", dplyr::if_else(age_groups, "age_group", "age")),
names_from = "sex_name",
values_from = "pop",
names_prefix = "pop_",
names_repair = janitor::make_clean_names
)
} else if (pivot_wider == "sex-only") {
pop_est <- pop_est |>
tidyr::pivot_wider(
id_cols = c(-"sex", -dplyr::if_else(age_groups, "age_group", "age")),
names_from = "sex_name",
values_from = "pop",
values_fn = sum,
names_prefix = "pop_",
names_repair = janitor::make_clean_names
)
} else if (pivot_wider == "age") {
pop_est <- pop_est |>
tidyr::pivot_wider(
id_cols = c(-"sex", "sex_name"),
names_from = dplyr::if_else(age_groups, "age_group", "age"),
values_from = "pop",
names_prefix = "pop_",
names_repair = janitor::make_clean_names
)
} else if (pivot_wider == "age-only") {
pop_est <- pop_est |>
tidyr::pivot_wider(
id_cols = c(-"sex", -"sex_name"),
names_from = dplyr::if_else(age_groups, "age_group", "age"),
values_from = "pop",
values_fn = sum,
names_prefix = "pop_",
names_repair = janitor::make_clean_names
)
}

return(pop_est)
}
63 changes: 54 additions & 9 deletions man/get_pop_est.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 944762d

Please sign in to comment.