-
Notifications
You must be signed in to change notification settings - Fork 1
/
Young people in deprived quintile.R
138 lines (115 loc) · 7.05 KB
/
Young people in deprived quintile.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# ScotPHO indicators:
# Young people living in the most access deprived quintile, aged 0-25 years
# Young people living in the most crime deprived quintile, aged 0-25 years
# Young people living in the most income deprived quintile, aged 0-25 years
## Part 1 - Format raw data ready for analysis functions
## Part 2 - calling the analysis functions
###############################################.
## Packages/Filepaths/Functions ----
###############################################.
# Varies filepaths depending on if using server or not.
if (sessionInfo()$platform %in% c("x86_64-redhat-linux-gnu (64-bit)", "x86_64-pc-linux-gnu (64-bit)")) {
cl_out_depr <- "/conf/linkage/output/lookups/Unicode/Deprivation/"
} else {
cl_out_depr <- "//stats/linkage/output/lookups/Unicode/Deprivation/"
}
source("1.indicator_analysis.R") #Normal indicator functions
#Small function to standardize each years info. Function parameters:
#Data is for what basefile to use, list_pos is for the position of the data frame
#simd for which simd variables-year to look at, year for what year is the data created.
prepare_file <- function(dz_list14, dz_list17) {
pop <- readRDS(paste0(lookups, "Population/DZ11_pop_basefile.rds")) %>%
filter(age<26 & year>2010) %>% group_by(year, datazone2011) %>%
summarise(numerator = sum(denominator, na.rm= T)) %>% ungroup %>%
rename(datazone = datazone2011)
raw_data <<- rbind(
pop %>% filter((datazone %in% dz_list14 & between(year, 2011, 2016))),
pop %>% filter((datazone %in% dz_list17 & year > 2016)))
}
###############################################.
## Part 1 - Format raw data ready for analysis functions ----
###############################################.
simd_data14 <- readRDS(paste0(cl_out_depr, 'DataZone2011_simd2016.rds')) %>%
setNames(tolower(names(.))) %>%
select(datazone2011, simd2016_crime_rank, simd2016_access_rank, simd2016_inc_rank)
simd_data17 <- readRDS(paste0(cl_out_depr, 'DataZone2011_simd2020v2.rds')) %>%
setNames(tolower(names(.))) %>%
select(datazone2011, simd2020v2_crime_rank, simd2020v2_access_rank, simd2020v2_inc_rank)
# Population 25 or under
# Selecting pop years used to create each simd version 2014 for SIMD2016, 2017 for SIMD2020
pop <- readRDS(paste0(lookups, "Population/DZ11_pop_basefile.rds")) %>%
filter(year %in% c("2014", "2017") & age <26) %>%
group_by(year, datazone2011) %>%
summarise(pop = sum(denominator, na.rm= T)) %>% ungroup
# Population totals for each year
pop14 <- pop %>% filter(year == "2014")
pop17 <- pop %>% filter(year == "2017")
pop_total14 <- pop14 %>% group_by(year) %>%
summarise(pop = sum(pop, na.rm= T)) %>% ungroup %>% pull(pop)
pop_total17 <- pop17 %>% group_by(year) %>%
summarise(pop = sum(pop, na.rm= T)) %>% ungroup %>% pull(pop)
# Creating the population thresholds for each quintile
cut_breaks14 <- c(0, pop_total14/5, pop_total14/5*2, pop_total14/5*3, pop_total14/5*4, pop_total14)
cut_breaks17 <- c(0, pop_total17/5, pop_total17/5*2, pop_total17/5*3, pop_total17/5*4, pop_total17)
# Preparing files for simd 2016
simd_data14 <- left_join(simd_data14, pop14, by = c("datazone2011")) %>%
arrange(simd2016_crime_rank) %>% # crime pop weighted quintile
mutate(cum_pop_crime=cumsum(pop),
crime_quintile = as.numeric(paste(cut(cum_pop_crime, cut_breaks14, include.lowest=TRUE,
labels=c("5", "4", "3", "2", "1"))))) %>%
arrange(simd2016_access_rank) %>% # access pop weighted quintile
mutate(cum_pop_access=cumsum(pop),
access_quintile = as.numeric(paste(cut(cum_pop_access, cut_breaks14, include.lowest=TRUE,
labels=c("5", "4", "3", "2", "1"))))) %>%
arrange(simd2016_inc_rank) %>% # income pop weighted quintile
mutate(cum_pop_inc=cumsum(pop),
inc_quintile = as.numeric(paste(cut(cum_pop_inc, cut_breaks14, include.lowest=TRUE,
labels=c("5", "4", "3", "2", "1"))))) %>%
select(-starts_with("cum_pop"), -starts_with("simd"))
# Preparing files for simd 2020
simd_data17 <- left_join(simd_data17, pop17, by = c("datazone2011")) %>%
arrange(simd2020v2_crime_rank) %>% # crime pop weighted quintile
mutate(cum_pop_crime=cumsum(pop),
crime_quintile = as.numeric(paste(cut(cum_pop_crime, cut_breaks17, include.lowest=TRUE,
labels=c("5", "4", "3", "2", "1"))))) %>%
arrange(simd2020v2_access_rank) %>% # access pop weighted quintile
mutate(cum_pop_access=cumsum(pop),
access_quintile = as.numeric(paste(cut(cum_pop_access, cut_breaks17, include.lowest=TRUE,
labels=c("5", "4", "3", "2", "1"))))) %>%
arrange(simd2020v2_inc_rank) %>% # income pop weighted quintile
mutate(cum_pop_inc=cumsum(pop),
inc_quintile = as.numeric(paste(cut(cum_pop_inc, cut_breaks17, include.lowest=TRUE,
labels=c("5", "4", "3", "2", "1"))))) %>%
select(-starts_with("cum_pop"), -starts_with("simd"))
# Creating lists of datazones in most topic deprived quintile
crime_dz14 <- simd_data14 %>% filter(crime_quintile == "5") %>% pull(datazone2011)
inc_dz14 <- simd_data14 %>% filter(inc_quintile == "5") %>% pull(datazone2011)
access_dz14 <- simd_data14 %>% filter(access_quintile == "5") %>% pull(datazone2011)
crime_dz17 <- simd_data17 %>% filter(crime_quintile == "5") %>% pull(datazone2011)
inc_dz17 <- simd_data17 %>% filter(inc_quintile == "5") %>% pull(datazone2011)
access_dz17 <- simd_data17 %>% filter(access_quintile == "5") %>% pull(datazone2011)
# Preparing files, joining with population by dz and saving files
saveRDS(prepare_file(inc_dz14, inc_dz17),
paste0(data_folder, "Prepared Data/young_people_income_raw.rds"))
saveRDS(prepare_file(crime_dz14, crime_dz17),
paste0(data_folder, "Prepared Data/young_people_crime_raw.rds"))
saveRDS(prepare_file(access_dz14, access_dz17),
paste0(data_folder, "Prepared Data/young_people_access_raw.rds"))
###############################################.
## Part 2 - Calling the analysis functions ----
###############################################.
###############################################.
filenames <- c("young_people_crime", "young_people_access", "young_people_income")
# Running functions for the three indicators
mapply(analyze_first, filename = filenames, geography = "datazone11", measure = "percent",
yearstart = 2011, yearend = 2023, time_agg = 1, pop = "DZ11_pop_under26")
mapply(analyze_second, filename = filenames, measure = "percent", time_agg = 1,
ind_id = c(13005, 13003, 13004), year_type = "calendar", qa = F)
# For individual checks
analyze_second(filename = "young_people_crime", measure = "percent", time_agg = 1,
ind_id = 13005, year_type = "calendar")
analyze_second(filename = "young_people_access", measure = "percent", time_agg = 1,
ind_id = 13003, year_type = "calendar")
analyze_second(filename = "young_people_income", measure = "percent", time_agg = 1,
ind_id = 13004, year_type = "calendar")
## END