-
Notifications
You must be signed in to change notification settings - Fork 3
/
run_epidemiar_demo.R
213 lines (174 loc) · 7.8 KB
/
run_epidemiar_demo.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# ###############################################################################################
#
# This script does all the data collating and processing before calling the epidemiar function
# to run the modeling, forecasting, and early detection and early warning alert algorithms.
#
# At the end, this script will generate a pdf report using epidemia_report_demo.Rnw
#
# See documentation/walkthrough.pdf for more details on each step.
#
# ###############################################################################################
# 1. Libraries & Functions ------------------------------------------------------
## Load packages necessary for script
#make sure pacman is installed
if (!require("pacman")) install.packages("pacman")
#load packages
#note: data corrals use dplyr, tidyr, lubridate, readr, readxl, epidemiar
#note: pdf creation requires knitr, tinytex
pacman::p_load(dplyr,
knitr,
lubridate,
parallel,
readr,
readxl,
tidyr,
tinytex,
tools)
#load specialized package (https://github.com/EcoGRAPH/epidemiar)
library(epidemiar)
## Locally-defined Functions
source("R/data_corrals.R")
source("R/report_save_create_helpers.R")
#due to experimental dplyr::summarise() parameter
options(dplyr.summarise.inform=F)
# 2. Reading in the Data -----------------------------------------------------
# read in woreda metadata
report_woredas <- read_csv("data/amhara_woredas.csv") %>%
filter(report == 1)
#Note: report woredas must have sufficient epi data, env data, and model cluster information, in appropriate files
# read & process case data needed for report
epi_data <- corral_epidemiological(report_woreda_names = report_woredas$woreda_name)
# read & process environmental data for woredas in report
env_data <- corral_environment(report_woredas = report_woredas)
## Optional: For slight speed increase,
# date filtering to remove older environmental data.
# older env data was included to demo epidemiar::env_daily_to_ref() function.
# in make_date_yw() weekday is always end of the week, 7th day
env_start_date <- epidemiar::make_date_yw(year = 2012, week = 1, weekday = 7)
#filter data
env_data <- env_data %>% filter(obs_date >= env_start_date)
#force garbage collection to free up memory
invisible(gc())
# read in climatology / environmental reference data
env_ref_data <- read_csv("data/env_ref_data_2002_2018.csv", col_types = cols())
# read in environmental info file
env_info <- read_xlsx("data/environ_info.xlsx", na = "NA")
# read in forecast and event detection parameters
source("data/epidemiar_settings_demo.R")
# ## OPTIONAL: Date Filtering for running certain (past) week's report
# req_date <- epidemiar::make_date_yw(year = 2016, week = 24, weekday = 7) #week is always end of the week, 7th day
# epi_data <- epi_data %>%
# filter(obs_date <= req_date)
# env_data <- env_data %>%
# filter(obs_date <= req_date)
# ## OPTIONAL: If instead the forecast should be beyond known epi data,
# then you can set the forecast start date directly
# pfm_report_settings$fc_start_date <- epidemiar::make_date_yw(year = 2020, week = 4, weekday = 7)
# pv_report_settings$fc_start_date <- epidemiar::make_date_yw(2020, week = 4, weekday = 7)
# # OPTIONAL: If you have created cached models to use instead of generating a new model:
# # selects the model per species with latest file created time
# # pfm
# all_pfm_models <- file.info(list.files("data/models/", full.names = TRUE, pattern="^pfm.*\\.RDS$"))
# if (nrow(all_pfm_models) > 0){
# latest_pfm_model <- rownames(all_pfm_models)[which.max(all_pfm_models$ctime)]
# pfm_model_cached <- readRDS(latest_pfm_model)
# }
# ##or select specific file
# #latest_pfm_model <- "data/pfm_model_xxxxxxx.RDS"
# pfm_report_settings$model_cached <- readRDS(latest_pfm_model)
# #pv
# all_pv_models <- file.info(list.files("data/models/", full.names = TRUE, pattern="^pv.*\\.RDS$"))
# if (nrow(all_pv_models) > 0){
# latest_pv_model <- rownames(all_pv_models)[which.max(all_pv_models$ctime)]
# pv_model_cached <- readRDS(latest_pv_model)
# }
# ##or select specific model
# #latest_pv_model <- "data/pv_model_xxxxxxxx.RDS"
# pv_report_settings$model_cached <- readRDS(latest_pv_model)
# 3. Run EPIDEMIA & create report data ---------------------------------------
#Run modeling to get report data
# with check on current epidemiology and environmental data sets
if (exists("epi_data") & exists("env_data")){
# P. falciparum & mixed
message("Running P. falciparum & mixed")
pfm_reportdata <- run_epidemia(
#data
epi_data = epi_data,
env_data = env_data,
env_ref_data = env_ref_data,
env_info = env_info,
#fields
casefield = test_pf_tot,
groupfield = woreda_name,
populationfield = pop_at_risk,
obsfield = environ_var_code,
valuefield = obs_value,
#required settings
fc_model_family = fc_model_family,
#other settings
report_settings = pfm_report_settings)
# P. vivax
message("Running P. vivax")
pv_reportdata <- run_epidemia(
#data
epi_data = epi_data,
env_data = env_data,
env_ref_data = env_ref_data,
env_info = env_info,
#fields
casefield = test_pv_only,
groupfield = woreda_name,
populationfield = pop_at_risk,
obsfield = environ_var_code,
valuefield = obs_value,
#required settings
fc_model_family = fc_model_family,
#other settings
report_settings = pv_report_settings)
#if using cached models:
#append model information to report data metadata
if (exists('pfm_model_cached')){
pfm_reportdata$params_meta$model_used <- latest_pfm_model
}
if (exists('pv_model_cached')){
pv_reportdata$params_meta$model_used <- latest_pv_model
}
} else {
message("Error: Epidemiological and/or environmental datasets are missing.
Check Section 2 for data error messages.")
}
# 4. Merge species data, Save, & Create PDF Report -------------------------------
if (exists("pfm_reportdata") & exists("pv_reportdata")){
#merging pfm & pv data, save out, and create pdf
merge_save_report(rpt_data_main = pfm_reportdata,
rpt_data_secd = pv_reportdata,
#mark sections as P. falciparum and mixed (pfm) or P. vivax (pv)
# used in the epidemia_report_demo.Rnw file for the formatted report
var_labs = c("pfm","pv"),
#save out the report data in the file that the formatting file reads
save_file = "report/report_data.RData",
#save out a second copy of the report data with year and week numbers in the name
second_save = TRUE,
#create the pdf
create_report = TRUE,
#which Rnw file to use to create pdf
formatting_file = "epidemia_report_demo.Rnw",
#append tag to file name (optional)
file_name_postfix = "synthetic_data",
#show the pdf immediately after creating
show_report = TRUE)
} else {
message("Error: Report data for P. falciparum and/or P. vivax
have not been generated and are missing.")
}
# Alternative: Create pdf report ----------------------------------------------------------
# If you want to later recreate a pdf from a saved report_data file:
# Change the input report_data_file to the previously saved version
# And add a file/name for the saved output.
# create_pdf(new_data = "report/report_data_2018W52.RData",
# #file that is loaded in the formatting file
# report_data_file = "report/report_data.RData",
# formatting_file = "epidemia_report_demo.Rnw",
# #specific output file name
# report_save_file = "report/epidemia_report_demo_2018W52.pdf",
# show = TRUE)