Skip to content

Commit

Permalink
some changes to add_keep_population_flag
Browse files Browse the repository at this point in the history
  • Loading branch information
lizihao-anu committed Oct 24, 2023
1 parent ab669d2 commit d8d4d02
Showing 1 changed file with 13 additions and 31 deletions.
44 changes: 13 additions & 31 deletions R/add_keep_population_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ add_keep_population_flag <- function(individual_file, year, chi_var_name = "chi"
} else {
## Obtain the population estimates for Locality AgeGroup and Gender.
pop_estimates <-
readr::read_rds(get_datazone_pop_path("DataZone2011_pop_est_2011_2021.rds"))
dplyr::select(year, datazone2011, sex, age0:age90plus)
readr::read_rds(get_datazone_pop_path("DataZone2011_pop_est_2011_2021.rds")) %>%

Check warning on line 18 in R/add_keep_population_flag.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/add_keep_population_flag.R,line=18,col=81,[line_length_linter] Lines should not be more than 80 characters.
dplyr::select(year, datazone2011, sex, age0:age90plus)

Check warning on line 19 in R/add_keep_population_flag.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/add_keep_population_flag.R,line=19,col=27,[object_usage_linter] no visible binding for global variable 'datazone2011'

Check warning on line 19 in R/add_keep_population_flag.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/add_keep_population_flag.R,line=19,col=41,[object_usage_linter] no visible binding for global variable 'sex'

Check warning on line 19 in R/add_keep_population_flag.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/add_keep_population_flag.R,line=19,col=46,[object_usage_linter] no visible binding for global variable 'age0'

Check warning on line 19 in R/add_keep_population_flag.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/add_keep_population_flag.R,line=19,col=51,[object_usage_linter] no visible binding for global variable 'age90plus'

# Step 1: Obtain the population estimates for Locality, AgeGroup, and Gender
# Select out the estimates for the year of interest.
Expand All @@ -36,10 +36,8 @@ add_keep_population_flag <- function(individual_file, year, chi_var_name = "chi"
pop_estimates <- pop_estimates %>%
# Recode gender to make it match source.
dplyr::mutate(sex = dplyr::if_else(sex == "M", 1, 2)) %>%

Check warning on line 38 in R/add_keep_population_flag.R

View workflow job for this annotation

GitHub Actions / lint-changed-files

file=R/add_keep_population_flag.R,line=38,col=42,[object_usage_linter] no visible binding for global variable 'sex'
dplyr::rename(
"age90" = "age90plus",
"gender" = "sex"
) %>%
dplyr::rename("age90" = "age90plus",
"gender" = "sex") %>%
tidyr::pivot_longer(
names_to = "age",
names_prefix = "age",
Expand All @@ -59,17 +57,6 @@ add_keep_population_flag <- function(individual_file, year, chi_var_name = "chi"

# Step 2: Work out the current population sizes in the SLF for Locality, AgeGroup, and Gender
# Work out the current population sizes in the SLF for Locality AgeGroup and Gender.
individual_file <- slfhelper::read_slf_individual(year,
columns = c(
chi_var_name,
"locality",
"age",
"gender",
"nsu",
"death_date"
)
)

individual_file_1 <- individual_file %>%
dplyr::mutate(age = as.integer(age)) %>%
add_age_group("age")
Expand All @@ -81,28 +68,24 @@ add_keep_population_flag <- function(individual_file, year, chi_var_name = "chi"
# If they don't have a locality, they're no good as we won't have an estimate to match them against.
# Same for age and gender.
nsu_keep_lookup <- individual_file_1 %>%
dplyr::filter(!is.na(locality), !is.na(age)) %>%
dplyr::filter(!is.na(locality),!is.na(age)) %>%
# Remove people who died before the mid-point of the calender year.
# This will make our numbers line up better with the methodology used for the mid-year population estimates.
# anyone who died 5 years before the file shouldn't be in it anyway...
dplyr::filter(death_date > mid_year | nsu != 0) %>%
# Calculate the populations of the whole SLF and of the NSU.
dplyr::group_by(locality, age_group, gender) %>%
dplyr::mutate(
nsu_population = sum(nsu),
total_source_population = dplyr::n()
) %>%
dplyr::mutate(nsu_population = sum(nsu),
total_source_population = dplyr::n()) %>%
dplyr::left_join(pop_estimates,
by = c("locality", "age_group", "gender")
) %>%
by = c("locality", "age_group", "gender")) %>%
dplyr::mutate(
difference = total_source_population - population_estimate,
new_nsu_figure = nsu_population - difference,
scaling_factor = new_nsu_figure / nsu_population,
scaling_factor = dplyr::case_when(scaling_factor < 0 ~ 0,
scaling_factor > 1 ~ 1,
.default = scaling_factor
),
scaling_factor > 1 ~ 1,
.default = scaling_factor),
keep_nsu = rbinom(1, 1, scaling_factor)
) %>%
dplyr::filter(keep_nsu == 1L) %>%
Expand All @@ -111,9 +94,8 @@ add_keep_population_flag <- function(individual_file, year, chi_var_name = "chi"
# step 3: match the flag back onto the slf
individual_file <- individual_file_1 %>%
dplyr::left_join(nsu_keep_lookup,
by = chi_var_name,
suffix = c("", ".y")
) %>%
by = chi_var_name,
suffix = c("", ".y")) %>%
dplyr::select(-contains(".y")) %>%
dplyr::rename("keep_population" = "keep_nsu") %>%
dplyr::mutate(
Expand All @@ -137,7 +119,7 @@ add_keep_population_flag <- function(individual_file, year, chi_var_name = "chi"
}


add_age_group <- function(individual_file, age_var_name) {
add_age_group = function(individual_file, age_var_name) {
individual_file <- individual_file %>%
dplyr::mutate(
age_group = dplyr::case_when(
Expand Down

0 comments on commit d8d4d02

Please sign in to comment.