Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
michdn committed Jun 26, 2020
2 parents 88a4415 + 1586ea8 commit bec0ed1
Show file tree
Hide file tree
Showing 63 changed files with 5,269 additions and 3,947 deletions.
33 changes: 18 additions & 15 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: epidemiar
Type: Package
Title: epidemiar: Create EPIDEMIA Environmentally-Mediated Disease Forecasts
Version: 2.1.0
Version: 3.1.0
Authors@R: c(
person(given = c("Dawn", "M"), family = "Nekorchuk", email = "[email protected]",
role = c("aut", "cre")),
Expand All @@ -14,25 +14,28 @@ Description: The Epidemic Prognosis Incorporating Disease and Environmental Moni
create short-term forecasts for environmentally-mediated diseases.
This R package contains the functions for modeling, forecasting, validation, and
early detection & early warning alerts.
License: GPL-3 + file LICENSE
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.1.1
Imports: dplyr (>= 0.8.3),
glue (>= 1.3.1),
lubridate (>= 1.7.4),
RoxygenNote: 7.1.0
Imports: dplyr (>= 1.0.0),
glue (>= 1.4.1),
lubridate (>= 1.7.9),
MASS,
magrittr (>= 1.5),
mgcv (>= 1.8-28),
parallel (>= 3.6.1),
pracma (>= 2.2.5),
rlang (>= 0.4.0),
surveillance (>= 1.17.0),
splines (>= 3.6.1),
tibble (>= 2.1.3),
tidyr (>= 0.8.3),
zoo (>= 1.8-6)
methods,
mgcv (>= 1.8-31),
parallel (>= 4.0.0),
pracma (>= 2.2.9),
readr (>= 1.3.1),
rlang (>= 0.4.6),
surveillance (>= 1.18.0),
splines (>= 4.0.0),
tibble (>= 3.0.1),
tidyr (>= 1.1.0),
zoo (>= 1.8-8)
Suggests:
clusterapply,
knitr,
rmarkdown
VignetteBuilder:
Expand Down
695 changes: 21 additions & 674 deletions LICENSE

Large diffs are not rendered by default.

Binary file modified Meta/vignette.rds
Binary file not shown.
4 changes: 3 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ export(epiwday)
export(get_group_validations)
export(get_overall_validations)
export(make_date_yw)
export(na_approx)
export(run_epidemia)
export(run_validation)
export(save_geog_validations)
export(save_overall_validations)
importFrom(lubridate,"%within%")
importFrom(magrittr,"%>%")
importFrom(rlang,"!!")
importFrom(rlang,":=")
importFrom(rlang,.data)
8 changes: 4 additions & 4 deletions R/add_datefields.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ add_datefields <- function(df, type = "ISO"){
if (week_type == "ISO"){
df <- df %>%
#add iso wk/yr
dplyr::mutate(week_epidemiar = lubridate::isoweek(obs_date),
year_epidemiar = lubridate::isoyear(obs_date))
dplyr::mutate(week_epidemiar = lubridate::isoweek(.data$obs_date),
year_epidemiar = lubridate::isoyear(.data$obs_date))

} else if (week_type == "CDC"){
#add CDC epi wk/yr
dplyr::mutate(week_epidemiar = lubridate::epiweek(obs_date),
year_epidemiar = lubridate::epiyear(obs_date))
dplyr::mutate(week_epidemiar = lubridate::epiweek(.data$obs_date),
year_epidemiar = lubridate::epiyear(.data$obs_date))
}

df
Expand Down
75 changes: 40 additions & 35 deletions R/cleaners_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,56 +4,61 @@

#' Interpolates missing epi data.
#'
#' @param epi_data Input data tibble with case counts in casefield, grouping
#' field groupfield, and date column "obs_date".
#' @param quo_casefield Quosure of user given casefield to run_epidemia().
#' @param quo_groupfield Quosure of the user given geographic grouping field to
#' run_epidemia().
#'
#'@inheritParams run_epidemia
#'
#' @return Same data as epi_data with new interpolated case field,
#' cases_epidemiar.
#'
#'
epi_NA_interpolate <- function(epi_data, quo_casefield, quo_groupfield){
epi_data %>%
dplyr::group_by(!!quo_groupfield) %>%
#confirm date sorting
dplyr::arrange(obs_date) %>%
#interpolate
dplyr::mutate(cases_epidemiar = epidemiar::na_approx(!!quo_casefield)) %>%
#confirm geogroup-date sorting
dplyr::arrange(!!quo_groupfield, .data$obs_date) %>%
#interpolate, but not on trailing edge
#dplyr::mutate(cases_epidemiar = epidemiar::na_approx(!!quo_casefield)) %>%
dplyr::mutate(cases_epidemiar = zoo::na.approx(!!quo_casefield, rule=2:1, na.rm = FALSE)) %>%
#force into integer after interpolating (could cause problems with modeling otherwise)
dplyr::mutate(cases_epidemiar = floor(.data$cases_epidemiar)) %>%
#finish by ungrouping
dplyr::ungroup()
}

#' Interpolates missing environmental data.
#'
#' @param env_data Input data tibble with environmental data: geographic
#' groupings in groupfield, environmental variable identified in obsfield, and
#' data values in valuefield. Contains a date column "obs_date".
#' @param quo_obsfield Quosure of the user given field that holds the
#' environmental variable identifiers/names/IDs.
#' @param quo_valuefield Quosure of the user given field that holds the
#' environmental variable observation value.
#' @param quo_groupfield Quosure of the user given geographic grouping field to
#' run_epidemia().
#'
#' @return Same data as env_data, with new interpolated field, val_epidemiar, of
#' the environmental variable data.
#'
env_NA_interpolate <- function(env_data, quo_obsfield, quo_valuefield, quo_groupfield){
env_data %>%
#first, mark which ones are observed versus (will be) interpolated
dplyr::mutate(data_source = ifelse(!is.na(!!quo_valuefield), "Observed", "Interpolated")) %>%
#two levels of group_by
dplyr::group_by(!!quo_groupfield, !!quo_obsfield) %>%
#confirm date sorting
dplyr::arrange(obs_date) %>%
#interpolate
dplyr::mutate(val_epidemiar = !!quo_valuefield,
val_epidemiar = epidemiar::na_approx(val_epidemiar)) %>%
#finish by ungrouping
dplyr::ungroup()
}

#' #' Interpolates missing environmental data.
#' #' Deprecated, no longer used as extend_env_data() will fill any gaps.
#' #'
#' #' @param quo_obsfield Quosure of the user given field that holds the
#' #' environmental variable identifiers/names/IDs.
#' #' @param quo_valuefield Quosure of the user given field that holds the
#' #' environmental variable observation value.
#' #' @param quo_groupfield Quosure of the user given geographic grouping field to
#' #' run_epidemia().
#' #'
#' #'@inheritParams run_epidemia
#' #'
#' #' @return Same data as env_data, with new interpolated field, val_epidemiar, of
#' #' the environmental variable data.
#' #'
#' env_NA_interpolate <- function(env_data, quo_obsfield, quo_valuefield, quo_groupfield){
#' env_data %>%
#' #first, mark which ones are observed versus (will be) interpolated
#' dplyr::mutate(data_source = ifelse(!is.na(!!quo_valuefield), "Observed", "Interpolated")) %>%
#' #two levels of group_by
#' dplyr::group_by(!!quo_groupfield, !!quo_obsfield) %>%
#' #confirm date sorting
#' dplyr::arrange(!!quo_groupfield, !!quo_obsfield, .data$obs_date) %>%
#' #interpolate
#' #dplyr::mutate(val_epidemiar = !!quo_valuefield,
#' # val_epidemiar = epidemiar::na_approx(.data$val_epidemiar)) %>%
#' dplyr::mutate(val_epidemiar = zoo::na.approx(!!quo_valuefield, rule = 2:1, na.rm = FALSE)) %>%
#' #finish by ungrouping
#' dplyr::ungroup()
#' }



Expand Down
15 changes: 11 additions & 4 deletions R/data_to_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,24 @@ data_to_daily <- function(data_notdaily, valuefield, interpolate = TRUE){

data_1day <- data_notdaily %>%
#should handle all grouping/categories
dplyr::group_by_at(dplyr::vars(-obs_date, -!!quo_valuefield)) %>%
dplyr::group_by_at(dplyr::vars(-.data$obs_date, -!!quo_valuefield)) %>%
#all explicit missing data - line for every Date for all groupings (above)
tidyr::complete(obs_date = tidyr::full_seq(c(min(data_notdaily$obs_date), max(data_notdaily$obs_date)), 1)) %>%
tidyr::complete(obs_date = tidyr::full_seq(c(min(data_notdaily$obs_date),
max(data_notdaily$obs_date)), 1)) %>%
dplyr::ungroup()

if (interpolate){
data_1day <- data_1day %>%
#should handle all grouping/categories
dplyr::group_by_at(dplyr::vars(-obs_date, -!!quo_valuefield)) %>%
#Likely does not, actually, need to add ... for grouping variables
dplyr::group_by_at(dplyr::vars(-.data$obs_date, -!!quo_valuefield)) %>%
#confirm sorting
#check if group_vars will handle grouping sorting issue
dplyr::arrange(dplyr::group_vars(), .data$obs_date) %>%
#will not extrapolate beyond last known value, that will happen inside run_epidemia()
mutate(!!quo_name(quo_valuefield) := epidemiar::na_approx(!!quo_valuefield)) %>%
#dplyr::mutate(!!rlang::as_name(quo_valuefield) := epidemiar::na_approx(!!quo_valuefield)) %>%
dplyr::mutate(!!rlang::as_name(quo_valuefield) := zoo::na.approx(!!quo_valuefield,
rule = 2:1, na.rm = FALSE)) %>%
#finish by ungrouping
dplyr::ungroup()
}
Expand Down
48 changes: 24 additions & 24 deletions R/environmental_reference.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,47 +72,47 @@ env_daily_to_ref <- function(daily_env_data,
env_weekly <- daily_env_data %>%
#get reference/summarizing method from user supplied env_info
dplyr::inner_join(env_info %>%
dplyr::select(!!quo_obsfield, reference_method),
by = rlang::set_names(rlang::quo_name(quo_obsfield),
rlang::quo_name(quo_obsfield))) %>%
dplyr::select(!!quo_obsfield, .data$reference_method),
by = rlang::set_names(rlang::as_name(quo_obsfield),
rlang::as_name(quo_obsfield))) %>%
#add week, year fields
epidemiar::add_datefields(week_type) %>%
#group by grouping, env var, and date week
dplyr::group_by(!!quo_groupfield, !!quo_obsfield, year_epidemiar, week_epidemiar) %>%
dplyr::group_by(!!quo_groupfield, !!quo_obsfield, .data$year_epidemiar, .data$week_epidemiar) %>%
#calculate with case_when at row level (fx is not vectorized, so can't be used inside summarize)
dplyr::mutate(val_epidemiar = case_when(
reference_method == "sum" ~ sum(!!quo_valuefield, na.rm = TRUE),
reference_method == "mean" ~ mean(!!quo_valuefield, na.rm = TRUE),
dplyr::mutate(val_epidemiar = dplyr::case_when(
.data$reference_method == "sum" ~ sum(!!quo_valuefield, na.rm = TRUE),
.data$reference_method == "mean" ~ mean(!!quo_valuefield, na.rm = TRUE),
#default is mean, but since inner_join with info table should not be invoked
TRUE ~ mean(!!quo_valuefield, na.rm = TRUE))) %>%
#now summarize
#val_epi is the same for the whole grouped set, so just taking the first value
dplyr::summarize(val_epidemiar = first(val_epidemiar)) %>%
dplyr::summarize(val_epidemiar = dplyr::first(.data$val_epidemiar)) %>%
#ungroup to end
dplyr::ungroup()

#Then, summarize weekly values over the years, multiple reference statistics
message("Summarizing weekly values over years")
env_ref <- env_weekly %>%
#dropping year from grouping list
dplyr::group_by(!!quo_groupfield, !!quo_obsfield, week_epidemiar) %>%
dplyr::group_by(!!quo_groupfield, !!quo_obsfield, .data$week_epidemiar) %>%
#summarize over years
dplyr::summarize(ref_value = mean(val_epidemiar, na.rm = TRUE),
ref_sd = sd(val_epidemiar, na.rm = TRUE),
ref_yrcount = n(),
ref_max = max(val_epidemiar, na.rm = TRUE),
ref_uq = quantile(val_epidemiar, probs = 0.75, na.rm = TRUE),
ref_median = median(val_epidemiar, na.rm = TRUE),
ref_lq = quantile(val_epidemiar, probs = 0.25, na.rm = TRUE),
ref_min = min(val_epidemiar, na.rm = TRUE)) %>%
dplyr::summarize(ref_value = mean(.data$val_epidemiar, na.rm = TRUE),
ref_sd = stats::sd(.data$val_epidemiar, na.rm = TRUE),
ref_yrcount = dplyr::n(),
ref_max = max(.data$val_epidemiar, na.rm = TRUE),
ref_uq = stats::quantile(.data$val_epidemiar, probs = 0.75, na.rm = TRUE),
ref_median = stats::median(.data$val_epidemiar, na.rm = TRUE),
ref_lq = stats::quantile(.data$val_epidemiar, probs = 0.25, na.rm = TRUE),
ref_min = min(.data$val_epidemiar, na.rm = TRUE)) %>%
#clean up NaN and Inf values (if entire week missing data)
dplyr::mutate(ref_value = ifelse(is.finite(ref_value), ref_value, NA),
ref_sd = ifelse(is.finite(ref_sd), ref_sd, NA),
ref_max = ifelse(is.finite(ref_max), ref_max, NA),
ref_uq = ifelse(is.finite(ref_uq), ref_uq, NA),
ref_median = ifelse(is.finite(ref_median), ref_median, NA),
ref_lq = ifelse(is.finite(ref_lq), ref_lq, NA),
ref_min = ifelse(is.finite(ref_min), ref_min, NA)) %>%
dplyr::mutate(ref_value = ifelse(is.finite(.data$ref_value), .data$ref_value, NA),
ref_sd = ifelse(is.finite(.data$ref_sd), .data$ref_sd, NA),
ref_max = ifelse(is.finite(.data$ref_max), .data$ref_max, NA),
ref_uq = ifelse(is.finite(.data$ref_uq), .data$ref_uq, NA),
ref_median = ifelse(is.finite(.data$ref_median), .data$ref_median, NA),
ref_lq = ifelse(is.finite(.data$ref_lq), .data$ref_lq, NA),
ref_min = ifelse(is.finite(.data$ref_min), .data$ref_min, NA)) %>%
#ungroup to end
dplyr::ungroup()

Expand Down
Loading

0 comments on commit bec0ed1

Please sign in to comment.