forked from ashattock/epi50
-
Notifications
You must be signed in to change notification settings - Fork 4
/
uncertainty.R
167 lines (140 loc) · 5.28 KB
/
uncertainty.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
###########################################################
# UNCERTAINTY
#
# Functions for generating and summarising model uncertainty.
#
###########################################################
# ---------------------------------------------------------
# Generate samples from extern model results
# ---------------------------------------------------------
extern_uncertainty = function() {
message(" > Generating uncertainty (",
o$uncertainty_samples, " samples)")
# ---- External model uncertainty summary ----
# Function to load external model uncertainty
load_fn = function(model) {
# Load external model uncertainty where available
model_uncert_dt = try_load(
pth = o$pth$extern,
file = paste1("epi50", model, "uncertainty"),
throw_error = FALSE)
return(model_uncert_dt)
}
# All external models
model_dt = table("extern_models")
# Average uncertainty bounds per d-v-a per year
bounds_dt = model_dt %>%
pull(model) %>%
lapply(load_fn) %>%
rbindlist() %>%
left_join(y = model_dt,
by = "model") %>%
left_join(y = table("d_v_a"),
by = "disease") %>%
group_by(d_v_a_id, year) %>%
summarise(lb = pmin(1, mean(lower)),
ub = pmax(1, mean(upper))) %>%
ungroup() %>%
as.data.table()
# ---- Generate uncertianty samples -----
# IDs of samples to generate
sample_ids = get_sample_ids(1 : o$uncertainty_samples)
# Load best estimate outcomes (all metrics)
best_dt = table("extern_estimates")
# Consider one metric at a time
for (metric in o$metrics) {
# Best estimate for this metric
metric_dt = best_dt %>%
rename(value = !!paste1(metric, "averted")) %>%
lazy_dt() %>%
# Summarise over age...
group_by(d_v_a_id, country, year) %>%
summarise(impact = sum(value)) %>%
ungroup() %>%
as.data.table()
# Apply bounds and generate uncertainty samples
samples_dt = metric_dt %>%
left_join(y = bounds_dt,
by = c("d_v_a_id", "year")) %>%
# Assume trivial uncertainty if undefined...
pivot_longer(cols = c(lb, ub),
names_to = "direction",
values_to = "scaler") %>%
replace_na(list(scaler = 1)) %>%
# Apply uncertainty bounds...
mutate(bound = impact * scaler,
diff = abs(impact - bound)) %>%
group_by(d_v_a_id, country, year, impact) %>%
summarise(diff = mean(diff)) %>%
ungroup() %>%
# Interpret average bound as three standard deviations from mean...
mutate(sd = diff / 3) %>%
select(d_v_a_id, country, year, mean = impact, sd) %>%
# Sample from Gaussian uncertainty_samples times...
expand_grid(sample = sample_ids) %>%
mutate(impact = rnorm(n(), mean, sd)) %>%
select(d_v_a_id, country, year, sample, impact) %>%
as.data.table()
# Concatenate with best estimate
uncert_dt = metric_dt %>%
mutate(sample = "best",
.before = impact) %>%
rbind(samples_dt) %>%
arrange(d_v_a_id, country, year, sample)
# Save in tables cache
save_table(uncert_dt, paste1("extern_uncertainty", metric))
}
}
# ---------------------------------------------------------
# Summarise over posterior samples for lower and upper bounds
# ---------------------------------------------------------
summarise_uncertainty = function(data, cumulative = FALSE) {
# Cumulative summing must be done before sample summary
if (cumulative == TRUE) {
# Cumulatively sum over time first
data %<>%
lazy_dt() %>%
group_by(d_v_a_id, country, sample) %>%
mutate(impact = cumsum(impact)) %>%
ungroup() %>%
as.data.table()
}
# Compute quantiles of all samples to obtain bounds
bounds_dt = data %>%
lazy_dt() %>%
filter(sample != "best") %>%
group_by(d_v_a_id, country, year) %>%
summarise(lower = quantile(impact, o$quantiles[1]),
upper = quantile(impact, o$quantiles[2])) %>%
ungroup() %>%
as.data.table()
# Append bounds to best estimate results
summary_dt = data %>%
filter(sample == "best") %>%
left_join(y = bounds_dt,
by = c("d_v_a_id", "country", "year")) %>%
# Bound uncertainty...
mutate(upper = pmax(upper, impact),
lower = pmin(lower, impact),
lower = pmax(lower, 0)) %>%
# Append evaluation error...
left_join(y = read_rds("history", "evaluation_error"),
by = c("d_v_a_id", "country", "year")) %>%
replace_na(list(err = 1)) %>%
# Consider evalutaion error in bounds...
mutate(upper = impact + (upper - impact) * (err ^ 2),
lower = impact - (impact - lower) * (err ^ 2),
lower = pmax(lower, 0)) %>%
select(d_v_a_id, country, year, impact, lower, upper)
return(summary_dt)
}
# ---------------------------------------------------------
# Convert sample number to sample ID
# ---------------------------------------------------------
get_sample_ids = function(samples) {
# For readability, we'll convert from sample numbers to IDs
sample_names = sprintf("s%04i", seq_along(samples))
# Construct named vector dictionary for recoding
sample_dict = setNames(sample_names, samples)
return(sample_dict)
}