-
Notifications
You must be signed in to change notification settings - Fork 1
/
Absolute and relative poverty.R
201 lines (156 loc) · 9.11 KB
/
Absolute and relative poverty.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#########################################################
# SG adult poverty data - data import
#########################################################
### Update ScotPHO indicators on adult poverty
### Author: Liz Richardson, 14 Nov 2024
# 2 indicators:
# Definitions:
# 30035 = absolute poverty = Percentage of adults living in households whose income is below 60% of the inflation-adusted UK median income in 2010/11. (doesn't specify whether after housing costs, but I've selected AHC to match relpov definition)
# 30031 = relative poverty = Percentage of adults living in private households with an equivalised income of less than 60% of the UK median income in the same year, after housing costs
### Notes on the data source:
# statistics.gov.scot
# Source: Scottish Government analysis of the Family Resources Survey
# https://statistics.gov.scot/resource?uri=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fpoverty-adults
# POVERTY DATA FROM STATISTICS.GOV.SCOT:
# National Statistics of the number and proportions of people living in private households with an equivalised household income below various poverty thresholds.
# Relative poverty: Individuals living in households whose equivalised income is below 60% of UK median income in the same year.
# This is a measure of whether those in the lowest income households are keeping pace with the growth of incomes in the economy as a whole.
# Absolute poverty: Individuals living in households whose equivalised income is below 60% of inflation adjusted UK median income in 2010/11.
# This is a measure of whether those in the lowest income households are seeing their incomes rise in real terms.
# Where estimates are suppressed due to small sample size this is marked with an asterisk ("*").
# The income measure used is equivalised net disposable income before and after housing costs.
# The before housing costs measure is income from all sources (including earnings, benefits, tax credits, pensions, and investments) after deductions for
# income tax, national insurance contributions, council tax, pension contributions and maintenance payments.
# The after housing costs measure further deducts housing costs such as rent and/or mortgage payments.
# The data source is the Department for Work and Pensions' Family Resources Survey (Households Below Average Income dataset).
# Adults are defined as all working age and pensionable age adults.
# Coverage:
# 1994/95-1996/97 to 2020/21-2022/23 (some splits don't go back this far though).
# N.B. The pandemic severely affected data collection and as a result, data from 2020/21 was not used to produce any of the three-year- averaged estimates.
# This means, for example, that the three-year periods 2018-21, 2019-22 and 2020-23 only contain data from two financial years each.
# statistics.gov.scot data were downloaded using opendatascot:
# https://scotgovanalysis.github.io/opendatascot/
# How to install:
# download the zipped repository from the opendatascot github page to a UNIX space.
# install.packages("devtools")
# library(devtools)
# devtools::install_local(
# "<FILEPATH OF ZIPPED FILE>/opendatascot-main.zip",
# upgrade = "never",
# build_vignettes = TRUE
# )
### functions/packages -----
source("1.indicator_analysis.R")
library(opendatascot)
### 1. Read in data ----
# see structure and variables of this dataset
ods_structure("poverty-adults")
# extract data
adultpov_raw <- opendatascot::ods_dataset("poverty-adults",
measureType = c("ratio", "sample-size"),
housingCosts = "after-housing-costs",
indicatorpoverty = c("relative-poverty", "absolute-poverty"),
familyType = "all",
maritalStatus = "all",
sexualOrientation = "all") %>%
select(-c(refArea, housingCosts, familyType, maritalStatus, sexualOrientation))
# prepare data
adultpov <- adultpov_raw %>%
# clean column names
clean_names() %>%
# reshape
pivot_wider(names_from = measure_type, values_from = value) %>%
rename(samplesize="sample-size") %>%
# confidence intervals
mutate(ci_wald = 100 * (1.96*sqrt(((ratio/100)*(1-(ratio/100)))/samplesize)), # Wald method.
lowci = ratio - ci_wald,
upci = ratio + ci_wald) %>%
# rename columns
rename(trend_axis = ref_period,
rate = ratio,
indicator = indicatorpoverty) %>%
# create single split name column
mutate(split_name = case_when(age != "all" ~ "Age",
religion != "all" ~ "Religion",
gender != "all" ~ "Gender",
gender == "all" & religion == "all" & age=="all" ~ "Total"),
# create single split value column
split_value = case_when(split_name == "Religion" ~ religion,
split_name == "Gender" ~ gender,
split_name == "Age" ~ age,
split_name == "Total" ~ "Total"),
# tidy split values
split_value = str_to_sentence(split_value), # capitalises first letter
split_value = str_replace_all(split_value, c("-years" = " years",
"years-and-over" = "years+",
"Church-of-scotland" = "Church of Scotland",
"No-religion" = "No religion",
"Other-christian" = "Other Christian",
"Roman-catholic" = "Roman Catholic")),
# Create new columns
code = "S00000001", #all are Scotland
numerator = as.numeric(NA), # insert column where numerator would ordinarily be
def_period = paste0(trend_axis, " (",
(as.numeric(substr(trend_axis, 9, 12)) - as.numeric(substr(trend_axis, 1, 4)) + 1),
" year aggregate)"),
year = as.numeric(substr(trend_axis, 1, 4)) + 1, # 3 year average, so find mid point
ind_id = case_when(indicator == "relative-poverty" ~ 30031,
indicator == "absolute-poverty" ~ 30035)
) %>%
# Drop vars not needed
select(-c(age, religion, gender, samplesize, ci_wald))
##########################################################
### 3. Prepare final files -----
##########################################################
# Function to prepare final files: main_data and popgroup
prepare_final_files <- function(ind){
# 1 - main data (ie data behind summary/trend/rank tab)
# Contains Scotland data, total pop
main_data <- adultpov %>%
filter(indicator == ind,
split_name == "Total") %>%
select(code, ind_id, year,
numerator, rate, upci, lowci,
def_period, trend_axis) %>%
unique() %>%
arrange(code,year)
write.csv(main_data, paste0(data_folder, "Data to be checked/", ind, "_shiny.csv"), row.names = FALSE)
write_rds(main_data, paste0(data_folder, "Data to be checked/", ind, "_shiny.rds"))
# 2 - population groups data (ie data behind population groups tab)
# Contains Scotland data by sex (including total)
pop_grp_data <- adultpov %>%
filter(indicator == ind & !(split_name %in% c("Total"))) %>%
select(code, ind_id, year, numerator, rate, upci,
lowci, def_period, trend_axis, split_name, split_value) %>%
arrange(code,year, split_name)
# Save
write.csv(pop_grp_data, paste0(data_folder, "Data to be checked/", ind, "_shiny_popgrp.csv"), row.names = FALSE)
write_rds(pop_grp_data, paste0(data_folder, "Data to be checked/", ind, "_shiny_popgrp.rds"))
# Make data created available outside of function so it can be visually inspected if required
assign(paste0("main_", ind), main_data, envir=.GlobalEnv)
assign(paste0("pop_grp_", ind), pop_grp_data, envir=.GlobalEnv)
}
# Run function to create final files
prepare_final_files(ind = "absolute-poverty")
prepare_final_files(ind = "relative-poverty")
# # Run QA reports
# # main data: failing because the data aren't available at HB level (fix the .rmd later) "Warning: Error in eval: object 'S08' not found"
run_qa(filename = "absolute-poverty")
run_qa(filename = "relative-poverty")
# Manual plot checks - which include pop groups
# main data
rbind(`main_absolute-poverty`, `main_relative-poverty`) %>%
ggplot(aes(year, rate, group = as.factor(ind_id), colour = as.factor(ind_id))) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1)
# popgroup data
`pop_grp_absolute-poverty` %>%
ggplot(aes(year, rate, group = as.factor(split_value), colour = as.factor(split_value))) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1) +
facet_wrap(~split_name)
`pop_grp_relative-poverty` %>%
ggplot(aes(year, rate, group = as.factor(split_value), colour = as.factor(split_value))) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1) +
facet_wrap(~split_name)