From ebbca02123a3a127a1565090c27a5f2f98378bf9 Mon Sep 17 00:00:00 2001 From: Jaime Villacampa Date: Fri, 9 Jul 2021 14:42:52 +0100 Subject: [PATCH] adding simd 2020, ugly but works --- Young people in deprived quintile.R | 106 ++++++++++++++++++---------- 1 file changed, 67 insertions(+), 39 deletions(-) diff --git a/Young people in deprived quintile.R b/Young people in deprived quintile.R index 1d4d872..c339f36 100644 --- a/Young people in deprived quintile.R +++ b/Young people in deprived quintile.R @@ -21,80 +21,108 @@ source("1.indicator_analysis.R") #Normal indicator functions #Small function to standarize each years info. Function parameters: #Data is for what basefile to use, list_pos is for the position of the data frame #simd for which simd variables-year to look at, year for what year is the data created. -prepare_file <- function(dz_list) { - raw_data <<- readRDS(paste0(lookups, "Population/DZ11_pop_basefile.rds")) %>% - filter(age<26 & datazone2011 %in% dz_list & year>2010) %>% group_by(year, datazone2011) %>% +prepare_file <- function(dz_list14, dz_list17) { + pop <- readRDS(paste0(lookups, "Population/DZ11_pop_basefile.rds")) %>% + filter(age<26 & year>2010) %>% group_by(year, datazone2011) %>% summarise(numerator = sum(denominator, na.rm= T)) %>% ungroup %>% rename(datazone = datazone2011) + raw_data <<- rbind( + pop %>% filter((datazone %in% dz_list14 & between(year, 2011, 2016))), + pop %>% filter((datazone %in% dz_list17 & year > 2016))) + } ###############################################. ## Part 1 - Format raw data ready for analysis functions ---- ###############################################. -simd_data <- readRDS(paste0(cl_out_depr, 'DataZone2011_simd2016.rds')) %>% +simd_data14 <- readRDS(paste0(cl_out_depr, 'DataZone2011_simd2016.rds')) %>% setNames(tolower(names(.))) %>% select(datazone2011, simd2016_crime_rank, simd2016_access_rank, simd2016_inc_rank) +simd_data17 <- readRDS(paste0(cl_out_depr, 'DataZone2011_simd2020v2.rds')) %>% + setNames(tolower(names(.))) %>% + select(datazone2011, simd2020v2_crime_rank, simd2020v2_access_rank, simd2020v2_inc_rank) + # Population 25 or under +# Selecting pop years used to create each simd version 2014 for SIMD2016, 2017 for SIMD2020 pop <- readRDS(paste0(lookups, "Population/DZ11_pop_basefile.rds")) %>% - filter(year == "2014") %>% group_by(year, datazone2011) %>% + filter(year %in% c("2014", "2017") & age <26) %>% + group_by(year, datazone2011) %>% summarise(pop = sum(denominator, na.rm= T)) %>% ungroup -pop_total <- pop %>% group_by(year) %>% summarise(pop = sum(pop, na.rm= T)) %>% - ungroup %>% pull(pop) - -cut_breaks <- c(0, pop_total/5, pop_total/5*2, pop_total/5*3, pop_total/5*4, pop_total) +pop14 <- pop %>% filter(year == "2014") +pop17 <- pop %>% filter(year == "2017") + +pop_total14 <- pop14 %>% group_by(year) %>% + summarise(pop = sum(pop, na.rm= T)) %>% ungroup %>% pull(pop) -simd_data <- left_join(simd_data, pop, by = c("datazone2011")) %>% +pop_total17 <- pop17 %>% group_by(year) %>% + summarise(pop = sum(pop, na.rm= T)) %>% ungroup %>% pull(pop) + +# Creating the population thresholds for each quintile +cut_breaks14 <- c(0, pop_total14/5, pop_total14/5*2, pop_total14/5*3, pop_total14/5*4, pop_total14) +cut_breaks17 <- c(0, pop_total17/5, pop_total17/5*2, pop_total17/5*3, pop_total17/5*4, pop_total17) + +# Preparing files for simd 2016 +simd_data14 <- left_join(simd_data14, pop14, by = c("datazone2011")) %>% arrange(simd2016_crime_rank) %>% # crime pop weighted quintile mutate(cum_pop_crime=cumsum(pop), - crime_quintile = as.numeric(paste(cut(cum_pop_crime, cut_breaks, include.lowest=TRUE, + crime_quintile = as.numeric(paste(cut(cum_pop_crime, cut_breaks14, include.lowest=TRUE, labels=c("5", "4", "3", "2", "1"))))) %>% arrange(simd2016_access_rank) %>% # access pop weighted quintile mutate(cum_pop_access=cumsum(pop), - access_quintile = as.numeric(paste(cut(cum_pop_access, cut_breaks, include.lowest=TRUE, + access_quintile = as.numeric(paste(cut(cum_pop_access, cut_breaks14, include.lowest=TRUE, labels=c("5", "4", "3", "2", "1"))))) %>% arrange(simd2016_inc_rank) %>% # income pop weighted quintile mutate(cum_pop_inc=cumsum(pop), - inc_quintile = as.numeric(paste(cut(cum_pop_inc, cut_breaks, include.lowest=TRUE, + inc_quintile = as.numeric(paste(cut(cum_pop_inc, cut_breaks14, include.lowest=TRUE, labels=c("5", "4", "3", "2", "1"))))) %>% select(-starts_with("cum_pop"), -starts_with("simd")) -crime_dz <- simd_data %>% filter(crime_quintile == "5") %>% pull(datazone2011) -inc_dz <- simd_data %>% filter(inc_quintile == "5") %>% pull(datazone2011) -access_dz <- simd_data %>% filter(access_quintile == "5") %>% pull(datazone2011) +# Preparing files for simd 2020 +simd_data17 <- left_join(simd_data17, pop17, by = c("datazone2011")) %>% + arrange(simd2020v2_crime_rank) %>% # crime pop weighted quintile + mutate(cum_pop_crime=cumsum(pop), + crime_quintile = as.numeric(paste(cut(cum_pop_crime, cut_breaks17, include.lowest=TRUE, + labels=c("5", "4", "3", "2", "1"))))) %>% + arrange(simd2020v2_access_rank) %>% # access pop weighted quintile + mutate(cum_pop_access=cumsum(pop), + access_quintile = as.numeric(paste(cut(cum_pop_access, cut_breaks17, include.lowest=TRUE, + labels=c("5", "4", "3", "2", "1"))))) %>% + arrange(simd2020v2_inc_rank) %>% # income pop weighted quintile + mutate(cum_pop_inc=cumsum(pop), + inc_quintile = as.numeric(paste(cut(cum_pop_inc, cut_breaks17, include.lowest=TRUE, + labels=c("5", "4", "3", "2", "1"))))) %>% + select(-starts_with("cum_pop"), -starts_with("simd")) + -saveRDS(prepare_file(inc_dz), paste0(data_folder, "Prepared Data/young_people_income_raw.rds")) -saveRDS(prepare_file(crime_dz), paste0(data_folder, "Prepared Data/young_people_crime_raw.rds")) -saveRDS(prepare_file(access_dz), paste0(data_folder, "Prepared Data/young_people_access_raw.rds")) +crime_dz14 <- simd_data14 %>% filter(crime_quintile == "5") %>% pull(datazone2011) +inc_dz14 <- simd_data14 %>% filter(inc_quintile == "5") %>% pull(datazone2011) +access_dz14 <- simd_data14 %>% filter(access_quintile == "5") %>% pull(datazone2011) +crime_dz17 <- simd_data17 %>% filter(crime_quintile == "5") %>% pull(datazone2011) +inc_dz17 <- simd_data17 %>% filter(inc_quintile == "5") %>% pull(datazone2011) +access_dz17 <- simd_data17 %>% filter(access_quintile == "5") %>% pull(datazone2011) + +saveRDS(prepare_file(inc_dz14, inc_dz17), + paste0(data_folder, "Prepared Data/young_people_income_raw.rds")) +saveRDS(prepare_file(crime_dz14, crime_dz17), + paste0(data_folder, "Prepared Data/young_people_crime_raw.rds")) +saveRDS(prepare_file(access_dz14, access_dz17), + paste0(data_folder, "Prepared Data/young_people_access_raw.rds")) ###############################################. ## Part 2 - Calling the analysis functions ---- ###############################################. ###############################################. -# Crime -analyze_first(filename = "young_people_crime", geography = "datazone11", measure = "percent", - yearstart = 2011, yearend = 2018, time_agg = 1, pop = "DZ11_pop_under26") - -analyze_second(filename = "young_people_crime", measure = "percent", time_agg = 1, - ind_id = 13005, year_type = "calendar", qa = F) +filenames <- c("young_people_crime", "young_people_access", "young_people_income") +# Running functions for the three indicators +mapply(analyze_first, filename = filenames, geography = "datazone11", measure = "percent", + yearstart = 2011, yearend = 2019, time_agg = 1, pop = "DZ11_pop_under26") -###############################################. -# Access -analyze_first(filename = "young_people_access", geography = "datazone11", measure = "percent", - yearstart = 2011, yearend = 2018, time_agg = 1, pop = "DZ11_pop_under26") - -analyze_second(filename = "young_people_access", measure = "percent", time_agg = 1, - ind_id = 13003, year_type = "calendar", qa = F) - -###############################################. -# Income -analyze_first(filename = "young_people_income", geography = "datazone11", measure = "percent", - yearstart = 2011, yearend = 2018, time_agg = 1, pop = "DZ11_pop_under26") +mapply(analyze_second(filename = "young_people_crime", measure = "percent", time_agg = 1, + ind_id = c(13005, 13003, 13004), year_type = "calendar", qa = F)) -analyze_second(filename = "young_people_income", measure = "percent", time_agg = 1, - ind_id = 13004, year_type = "calendar", qa = F) ## END