-
Notifications
You must be signed in to change notification settings - Fork 1
/
Unmanageable debt.R
194 lines (157 loc) · 9.02 KB
/
Unmanageable debt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#########################################################
# Unmanageable debt indicator from Wealth and Assets Survey - data import
#########################################################
### Update ScotPHO indicator on unmanageable debt sourced from SG Scotland's Labour Market data:
### Author: Liz Richardson, 15 Nov 2024
# Indicator:
# 30044 = Percentage of households where the household is falling behind with bills or credit commitments and either making
# excessive debt repayments or is in arrears on monthly commitments (liquidity problems); or where the household is
# burdened by high debt levels relative to annual income (solvency problems)
# Problem debt is defined as having liquidity problems, solvency problems or both.
# Liquidity problems is defined as "falling behind with bills or credit commitments and (in two or more consecutive
# months arrears on bills or credit commitments or household debt repayment to net monthly income ratio >25%)".
# Solvency problems is defined as "feel debt is a heavy burden and debt to net annual income ratio >20%".
# Source: Wealth and Assets Survey, ONS
# SG have extracted Scottish data for 5 waves into:
# https://data.gov.scot/wealth/
# WAS does not cover the regions north of the Caledonian Canal and the Scottish islands.
# It is therefore not representative of the whole of the Scottish population, and especially leaves out
# some of the most remote areas in Scotland.
# The household head is the person with the highest income, or, for equal incomes, the older person.
# Availability: Scot, M/F/Total, 2010-12 to 2018-20
# CIs derived using Wald method, % and unweighted base (sample size).
### functions/packages -----
source("1.indicator_analysis.R")
# Load additional packages
library(openxlsx)
### 1. Read in data ----
# Identify data folder
was_data_folder <- paste0(data_folder, "Received Data/Unmanageable debt/sg_wealth_in_scotland_report/")
file <- "tables2022.xlsx"
## Read in data
import_wealth_data <- function(sheet, rows, groups, split_name) {
df <- read.xlsx(paste0(was_data_folder, file),
sheet = sheet,
colNames = TRUE,
rows = rows) %>%
filter(Group %in% groups) %>%
cbind(statistic = c(rep("rate", length(groups)), # adding a column giving the statistic type for the imported data. Only % and sample size are required.
rep("NA", 2*length(groups)),
rep("samplesize", length(groups)))) %>%
filter(statistic != "NA") %>%
pivot_longer(-c(statistic, Group), names_to = "trend_axis", values_to = "value") %>%
mutate(value = case_when(statistic=="rate" ~ round(as.numeric(value)*100, 1),
TRUE ~ as.numeric(value))) %>%
pivot_wider(names_from = statistic, values_from = value) %>%
mutate(ci_wald = 100 * (1.96*sqrt(((rate/100)*(1-(rate/100)))/samplesize)), # Wald method.
lowci = rate - ci_wald,
upci = rate + ci_wald) %>%
mutate(split_value = case_when(Group == "All" ~ "Total",
TRUE ~ Group),
split_name = split_name) %>%
select(-c(Group, ci_wald, samplesize))
}
data_sex <- import_wealth_data(sheet="53", rows=c(6:24), groups=c("Female", "Male", "All"), split_name="Sex of household head")
data_ur <- import_wealth_data(sheet="54", rows=c(6:24), groups=c("Rural", "Urban", "All"), split_name="Urban/Rural status")
data_disab <- import_wealth_data(sheet="51", rows=c(6:24), groups=c("Someone disabled", "No-one disabled", "All"), split_name="Disability of household member(s)")
data_ethnicity <- import_wealth_data(sheet="46", rows=c(7:25), groups=c("White British", "Minority ethnic", "All"), split_name="Ethnicity of household head")
data_income <- import_wealth_data(sheet="49", rows=c(6:36),
groups=c("1st (lowest) household income quintile", "2nd", "3rd", "4th", "5th (highest) household income quintile", "All"),
split_name="Household income (equivalised)") %>%
mutate(split_value = case_when(split_value == "1st (lowest) household income quintile" ~ "1 - lowest income",
split_value == "2nd" ~ "2",
split_value == "3rd" ~ "3",
split_value == "4th" ~ "4",
split_value == "5th (highest) household income quintile" ~ "5 - highest income",
split_value == "All" ~ "Total"))
data_age <- import_wealth_data(sheet="47", rows=c(6:40),
groups=c("16-34","35-44","45-54","55-64","65-74","75+","All"), split_name="Age of household head") %>%
mutate(split_value = paste0(split_value, "y"),
split_value = case_when(split_value == "Totaly" ~ "Total",
split_value == "75+y" ~ "75y+",
TRUE ~ split_value))
all_data <- rbind(data_sex,
data_ur,
data_disab,
data_ethnicity,
data_income,
data_age) %>%
mutate(code = "S00000001", #all are Scotland
ind_id = 30044,
numerator = as.numeric(NA), # insert column where numerator would ordinarily be
def_period = paste0(trend_axis, " (3 survey year aggregate)"),
year = as.numeric(substr(trend_axis, 1, 4)) + 1) # 3 year average, so find mid point
##########################################################
### 3. Prepare final files -----
##########################################################
# Function to prepare final files: main_data and popgroup
prepare_final_files <- function(ind){
# 1 - main data (ie data behind summary/trend/rank tab)
# Contains Scotland data, total pop
main_data <- all_data %>%
filter(split_name == "Sex of household head" & split_value=="Total") %>%
select(code, ind_id, year,
numerator, rate, upci, lowci,
def_period, trend_axis) %>%
unique() %>%
arrange(code, year)
write.csv(main_data, paste0(data_folder, "Data to be checked/", ind, "_shiny.csv"), row.names = FALSE)
write_rds(main_data, paste0(data_folder, "Data to be checked/", ind, "_shiny.rds"))
# 2 - population groups data (ie data behind population groups tab)
# Contains Scotland data by population groups (including total)
pop_grp_data <- all_data %>%
filter(!(split_name %in% c("None", "Deprivation (SIMD)"))) %>%
select(code, ind_id, year, numerator, rate, upci,
lowci, def_period, trend_axis, split_name, split_value) %>%
arrange(code, year, split_name)
# Save
write.csv(pop_grp_data, paste0(data_folder, "Data to be checked/", ind, "_shiny_popgrp.csv"), row.names = FALSE)
write_rds(pop_grp_data, paste0(data_folder, "Data to be checked/", ind, "_shiny_popgrp.rds"))
# Make data created available outside of function so it can be visually inspected if required
main_data_result <<- main_data
pop_grp_data_result <<- pop_grp_data
}
# Run function to create final files
prepare_final_files(ind = "unmanageable_debt")
# # Run QA reports
run_qa(filename = "unmanageable_debt")
# Manual plot checks as pop group automated checks not written yet
# Plot the indicator(s)
# =================================================================================================================
# Let's now see what the series and CIs look like:
# Sex of household head
all_data %>%
filter(split_name=="Sex of household head") %>%
ggplot(aes(year, rate, group = split_value, colour = split_value, shape = split_value)) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1)
# Age of household head
all_data %>%
filter(split_name=="Age of household head") %>%
ggplot(aes(year, rate, group = split_value, colour = split_value, shape = split_value)) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1)
# Urban/rural status
all_data %>%
filter(split_name=="Urban/Rural status") %>%
ggplot(aes(year, rate, group = split_value, colour = split_value, shape = split_value)) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1)
# Disability of household member(s)
all_data %>%
filter(split_name=="Disability of household member(s)") %>%
ggplot(aes(year, rate, group = split_value, colour = split_value, shape = split_value)) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1)
# Household income
all_data %>%
filter(split_name=="Household income (equivalised)") %>%
ggplot(aes(year, rate, group = split_value, colour = split_value, shape = split_value)) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1)
# Ethnicity of household head
all_data %>%
filter(split_name=="Ethnicity of household head") %>%
ggplot(aes(year, rate, group = split_value, colour = split_value, shape = split_value)) +
geom_point() + geom_line() +
geom_ribbon(aes(ymin = lowci, ymax = upci), alpha = 0.1)