-
Notifications
You must be signed in to change notification settings - Fork 1
/
Alcohol exceeding weekly limits.R
136 lines (89 loc) · 4.75 KB
/
Alcohol exceeding weekly limits.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
### notes --------------------------------------------------------------
# 4165: Alcohol consumption exceeding weekly limits, females
# 4163: Alcohol consumption exceeding weekly limits, males
# 4164: Alcohol consumption exceeding weekly limits, all
# source: https://statistics.gov.scot/resource?uri=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fscottish-health-survey-local-area-level-data
# data is automatically extacted from statistics.gov.scot when this script is run
# just update the date period below to ensure the latest date period is extracted: -
### 1. load packages/dependencies ----------------------------------------------
source("1.indicator_analysis.R") # sourcing custom indicator functions
library(reactable)
library(janitor) # for cleaning column names
### 2. read in downloaded data -------------------------------------------------
# Set filepath and read in csv file
raw_data_filepath <- paste0(data_folder, "Received Data/Alcohol exceeding weekly limits/Alcohol_exceeding_limits.csv")
raw_data <- read.csv(raw_data_filepath)
# clean the column names
data <- raw_data |>
clean_names()
### 3. tidy up the data --------------------------------------------------------
#select columns
data <- data |>
select(year, geographylevel, location, categories, sex, percent, lower_ci, upper_ci)
#filter out both combined sexes to leave male and female
data <- data |>
filter(sex == "Male" | sex == "Female") |>
filter(categories == "Hazardous/Harmful drinker")
#rename some of the existing columns to match final output expected by profiles tool
data <- data |>
rename(lowci = lower_ci, upci = upper_ci, rate = percent)
### 4. use geography lookup to recode geography names --------------------------
#read in geography lookup named 'codedictionary' from the scotpho lookups folder
geography_lookup <- readRDS(paste0(data_folder, "Lookups/Geography/codedictionary.rds"))
#filter geography lookup for healthboards, council areas and intermediate zones respectively
geography_lookup <- geography_lookup |>
filter(str_detect(code, "S08|S12|S00"))
#add "NHS" to the beginning of the HB names to match lookup
data_correct <- data |>
mutate(location_new = case_when(geographylevel == "Health Board" ~ paste("NHS", location), TRUE ~ location))
#recode City of Edinburgh to Edinburgh city to match lookup
data_correct <- data_correct |>
mutate(location_new = case_when(
location_new == "Edinburgh City" ~ "City of Edinburgh",
TRUE ~ location_new
))
#join the new data to the lookup
data_correct <- data_correct |>
left_join(geography_lookup, by = c("location_new" = "areaname"))
# Remove unnecessary columns from joining the lookup to the data
data_correct <- data_correct |>
select(-c(geographylevel, location, categories))
### 5. format dates ------------------------------------------------------------
#create a def_period column
data_correct <- data_correct |>
mutate(def_period= paste0("4-year aggregate"," (", year, ")")) %>%
mutate(numerator = '') %>%
mutate(trend_axis = year)
#arrange the columns
data_correct <- data_correct %>%
select(code, year, sex, numerator, def_period, trend_axis, upci, lowci, rate)
#get a single year from the given range
data_correct <- data_correct |>
mutate(year = str_sub(year, start = -4))
#make the year variable numeric rather than character
data_correct$year <- as.numeric(data_correct$year)
#change to required year by subtracting 1
data_correct$year = data_correct$year - 1
rows_to_modify <- data_correct$trend_axis == '2017-2021'
data_correct$year[rows_to_modify] <- data_correct$year[rows_to_modify] - 1
### 6. separate male and female data -------------------------------------------
#separate into males and females and create indicator ID col based on tech doc
males <- data_correct |>
filter(sex == "Male") |>
mutate(ind_id="4163") |>
select(-sex)
females <- data_correct |>
filter(sex == "Female")|>
mutate(ind_id = "4165") |>
select(-sex)
### 7. save output -------------------------------------------------------------
# saving the male indicator data
write.csv(males, paste0(data_folder, "Data to be checked/alcohol-exceeding-weekly-limits-male_shiny.csv"), row.names = FALSE)
saveRDS(males, paste0(data_folder, "Data to be checked/alcohol-exceeding-weekly-limits-male_shiny.rds"))
# saving the female indicator data
write.csv(females, paste0(data_folder, "Data to be checked/alcohol-exceeding-weekly-limits-female_shiny.csv"), row.names = FALSE)
saveRDS(females, paste0(data_folder, "Data to be checked/alcohol-exceeding-weekly-limits-female_shiny.rds"))
### 8. run QA functions --------------------------------------------------------
run_qa(filename = "alcohol-exceeding-weekly-limits-female", check_extras=c())
run_qa(filename = "alcohol-exceeding-weekly-limits-male", check_extras=c())
### END