From 0655bbf83b6f3a1e66f89dcaabe1f43ea00258b7 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 8 Oct 2024 20:44:03 +0000 Subject: [PATCH] site deploy Auto-generated via {sandpaper} Source : 7c8d7376ae417c75d7539ee5f4ef37b2d2f9c1f1 Branch : md-outputs Author : GitHub Actions Time : 2024-10-08 20:43:45 +0000 Message : markdown source builds Auto-generated via {sandpaper} Source : 92f3322b53954ee25e57851b2495e441d67a4490 Branch : main Author : Andree Valle Campos Time : 2024-10-08 20:39:58 +0000 Message : move the statistical estimates to a separate entry --- 404.html | 8 +- CODE_OF_CONDUCT.html | 4 +- LICENSE.html | 4 +- aio.html | 12 +- clean-data.html | 4 +- describe-cases.html | 6 +- epikinetics-descriptive.html | 600 +++++++++++++++++++++++ epikinetics-statistics.html | 598 +++++++++++++++++++++++ images.html | 6 +- index.html | 4 +- instructor-notes.html | 6 +- instructor/404.html | 8 +- instructor/CODE_OF_CONDUCT.html | 4 +- instructor/LICENSE.html | 4 +- instructor/aio.html | 10 +- instructor/clean-data.html | 4 +- instructor/describe-cases.html | 6 +- instructor/epikinetics-descriptive.html | 602 ++++++++++++++++++++++++ instructor/epikinetics-statistics.html | 600 +++++++++++++++++++++++ instructor/images.html | 4 + instructor/index.html | 4 +- instructor/instructor-notes.html | 4 + instructor/key-points.html | 4 + instructor/profiles.html | 4 +- instructor/read-cases.html | 8 +- instructor/reference.html | 4 +- instructor/validate.html | 4 +- key-points.html | 6 +- md5sum.txt | 2 + pkgdown.yml | 2 +- profiles.html | 4 +- read-cases.html | 8 +- reference.html | 4 +- sitemap.xml | 12 + validate.html | 4 +- 35 files changed, 2511 insertions(+), 57 deletions(-) create mode 100644 epikinetics-descriptive.html create mode 100644 epikinetics-statistics.html create mode 100644 instructor/epikinetics-descriptive.html create mode 100644 instructor/epikinetics-statistics.html diff --git a/404.html b/404.html index c7d071bc..1ef0e95b 100644 --- a/404.html +++ b/404.html @@ -105,7 +105,9 @@ - Read and clean case data, and make linelist for outbreak analytics with R: Solutions using epikinetics data and package +
+ Read and clean case data, and make linelist for outbreak analytics with R +
+ +
+
+ + + + + +
+
+

Solutions using epikinetics data and package

+

Last updated on 2024-10-08 | + + Edit this page

+ + + +
+ +
+ + +
+

Descriptive analysis

+
+

R +

+
+
+#' goal:
+#' vaccination incidence stratified by vaccine type
+#' observation incidence stratified by censoring status
+
+# load packages -----------------------------------------------------------
+
+library(tidyverse)
+library(cleanepi)
+library(datatagr) # a generalization of {linelist}
+library(incidence2)
+
+# read data ---------------------------------------------------------------
+
+# rawdata <- "data-raw/delta.csv"
+rawdata <- "https://raw.githubusercontent.com/seroanalytics/epikinetics/refs/heads/main/inst/delta_full.rds"
+
+dat <- read_csv(rawdata)
+
+dat %>% glimpse()
+
+# what these columns mean? ------------------------------------------------
+
+#' data dictionary: https://seroanalytics.org/epikinetics/articles/data.html
+#' reference paper: https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(24)00484-5/fulltext
+#' location: https://github.com/seroanalytics/epikinetics/tree/main/inst
+
+# 335 subjects where followed up
+dat %>% count(pid)
+
+## what "titre type" means? -------------------------------------------------
+
+#' In the time series, 
+#' each subject had monthly serum measurements 
+#' for three types of antigens ("titre_type").
+#' 
+#' Serum samples where challenged against Ancestral, Alpha and Delta antigens.
+#' 
+#' The column "value" measures the titre of 
+#' the neutralizing effect of each sample against each antigen 
+
+dat %>%
+  dplyr::filter(pid == 2) %>% 
+  dplyr::arrange(date) %>% 
+  # select time invariant columns
+  dplyr::select(
+    pid, infection_history, exp_num, last_exp_date, last_vax_type,
+    dplyr::everything()
+  )
+
+## what "censored" means? ----------------------------------------------------
+
+# context: censored regression model
+# the "value" as the outcome is censored above or below
+# because the it was measured outside the limits of detection
+# threshold limit below: 5
+# threshold limit above: 2560
+
+dat %>% 
+  ggplot(aes(value, fill = as.factor(censored))) + 
+  geom_histogram()
+
+# datatagr ----------------------------------------------------------------
+
+datatagr::lost_labels_action()
+datatagr::get_lost_labels_action()
+# datatagr::lost_labels_action(action = "error")
+
+# cleanepi ----------------------------------------------------------------
+
+# check sequence of events
+
+dat_clean <- dat %>% 
+  # arrange columns
+  dplyr::select(
+    pid, infection_history, exp_num, last_exp_date, last_vax_type,
+    dplyr::everything()
+  ) %>% 
+  # arrange rows
+  dplyr::arrange(
+    pid, infection_history, exp_num, last_exp_date, last_vax_type, date
+  ) %>% 
+  # cleanepi
+  cleanepi::check_date_sequence(
+    target_columns = c("last_exp_date", "date")
+  ) %>% 
+  # cleanepi::print_report()
+  cleanepi::timespan(
+    target_column = "last_exp_date",
+    end_date = "date",
+    span_unit = "days",
+    span_column_name = "t_since_last_exp",
+    span_remainder_unit = "days"
+    ) %>% 
+  # extra wrangling
+  mutate(
+    last_vax_type = forcats::fct_infreq(last_vax_type),
+    exp_num = forcats::as_factor(exp_num),
+    titre_type = forcats::fct_relevel(titre_type,"Ancestral", "Alpha"),
+    censored = forcats::as_factor(censored)
+  ) %>% 
+  # tag with {datatagr}
+  datatagr::make_datatagr(
+    pid = "subject id",
+    infection_history = "subject infection history",
+    exp_num = "number of vaccine exposures",
+    last_exp_date = "date of last exposure",
+    last_vax_type = "type of vaccine in the last exposure",
+    date = "date of observation of titre in serum sample",
+    titre_type = "type of antigen challenged against serum sample",
+    value = "titre value",
+    censored = "censored titre value out of limit of detection [5 - 2560] bellow (-1) or above (+1)",
+    t_since_last_exp = "time interval between last vaccine exposure and observed serum sample titre"
+  ) %>% 
+  # validate with {datatagr}
+  datatagr::validate_datatagr(
+    pid = "numeric",
+    infection_history = "character",
+    exp_num = "factor",
+    last_exp_date="Date",
+    last_vax_type = "factor",
+    date = "Date",
+    titre_type = "factor",
+    value = "numeric",
+    censored = "factor",
+    t_since_last_exp = "numeric"
+  ) %>% 
+  # datatagr::labels_df() %>% # this extract labels as column names [affects downstream] 
+  identity()
+
+dat_clean
+
+# distribution of the time from the last vaccine to first observation
+dat_clean %>% 
+  group_by(pid) %>% 
+  filter(date == min(date)) %>% 
+  slice(1) %>% 
+  ungroup() %>% 
+  ggplot(aes(t_since_last_exp)) + 
+  geom_histogram()
+
+## subject table -----------------------------------------------------------
+
+# subject time-invariant data
+dat_subject <- dat_clean %>% 
+  # {datatagr} reacts with dplyr::select() but not with dplyr::count() when losing tags
+  dplyr::select(pid, infection_history, exp_num, last_exp_date, last_vax_type) %>% 
+  dplyr::count(pid, infection_history, exp_num, last_exp_date, last_vax_type)
+  
+# table 1: time-invariant columns
+dat_subject %>% 
+  compareGroups::compareGroups(
+    data = .,
+    formula = ~infection_history + exp_num + last_exp_date + last_vax_type 
+  ) %>% 
+  compareGroups::createTable()
+
+# table 2: were vaccine type differently applied between naive and non-naive?
+dat_subject %>% 
+  compareGroups::compareGroups(
+    data = .,
+    formula = last_vax_type~infection_history,
+    byrow = TRUE
+  ) %>% 
+  compareGroups::createTable(show.all = TRUE)
+
+# vaccinations ------------------------------------------------------------
+
+## by vaccine type ---------------------------------------------
+
+dat_subject %>% 
+  # aggregate
+  incidence2::incidence(
+    date_index = "last_exp_date", # change: "date" or "last_exp_date"
+    groups = ("last_vax_type"), # change: "titre_type" or "infection_history" or "last_vax_type" or c("infection_history", "titre_type")
+    interval = "month", # change: "day" or "week" or "epiweek" or "month"
+    # complete_dates = TRUE, # relevant to downstream analysis [time-series data]
+  ) %>% 
+  # transform to cumulative per group (optional display)
+  # incidence2::cumulate() %>% 
+  # plot
+  incidence2:::plot.incidence2(
+    fill = "last_vax_type"
+  )
+
+# observations ------------------------------------------------------------
+
+# by history-variants
+# not required, this reflect the proportion of "infection_history" in the cohort
+
+## by censored -----------------------------------------------
+
+dat_clean %>% count(censored)
+
+dat_clean %>% 
+  incidence2::incidence(
+    date_index = "date", # change: "date" or "last_exp_date"
+    groups = "censored", # change: "censored" or "titre_type" or "infection_history" or "last_vax_type" or c("infection_history", "titre_type")
+    interval = "month", # change: "day", "week", "month"
+    # complete_dates = TRUE # relevant to downstream analysis [time-series data]
+  ) %>% 
+  incidence2:::plot.incidence2(
+    fill = "censored"
+  )
+
+ +
+ + +
+
+ + +
+
+ + + diff --git a/epikinetics-statistics.html b/epikinetics-statistics.html new file mode 100644 index 00000000..076b32e9 --- /dev/null +++ b/epikinetics-statistics.html @@ -0,0 +1,598 @@ + +Read and clean case data, and make linelist for outbreak analytics with R: Solutions using epikinetics data and package +
+ Read and clean case data, and make linelist for outbreak analytics with R +
+ +
+
+ + + + + +
+
+

Solutions using epikinetics data and package

+

Last updated on 2024-10-08 | + + Edit this page

+ + + +
+ +
+ + +
+

Statistical analysis

+
+

R +

+
+
+#' goal:
+#' model the kinetics of neutralising-antibody titres after antigenic SARS-CoV-2 exposure?
+
+# load packages -----------------------------------------------------------
+
+library(tidyverse)
+library(cleanepi)
+library(datatagr) # a generalization of {linelist}
+# library(epikinetics)
+
+# read data ---------------------------------------------------------------
+
+# rawdata <- "data-raw/delta.csv"
+rawdata <- "https://raw.githubusercontent.com/seroanalytics/epikinetics/refs/heads/main/inst/delta_full.rds"
+
+dat <- read_csv(rawdata)
+
+dat %>% glimpse()
+
+# what these columns mean? ------------------------------------------------
+
+#' data dictionary: https://seroanalytics.org/epikinetics/articles/data.html
+#' reference paper: https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(24)00484-5/fulltext
+#' location: https://github.com/seroanalytics/epikinetics/tree/main/inst
+
+# 335 subjects where followed up
+dat %>% count(pid)
+
+## what "titre type" means? -------------------------------------------------
+
+#' In the time series, 
+#' each subject had monthly serum measurements 
+#' for three types of antigens ("titre_type").
+#' 
+#' Serum samples where challenged against Ancestral, Alpha and Delta antigens.
+#' 
+#' The column "value" measures the titre of 
+#' the neutralizing effect of each sample against each antigen 
+
+dat %>%
+  dplyr::filter(pid == 2) %>% 
+  dplyr::arrange(date) %>% 
+  # select time invariant columns
+  dplyr::select(
+    pid, infection_history, exp_num, last_exp_date, last_vax_type,
+    dplyr::everything()
+  )
+
+## what "censored" means? ----------------------------------------------------
+
+# context: censored regression model
+# the "value" as the outcome is censored above or below
+# because the it was measured outside the limits of detection
+# threshold limit below: 5
+# threshold limit above: 2560
+
+dat %>% 
+  ggplot(aes(value, fill = as.factor(censored))) + 
+  geom_histogram()
+
+# datatagr ----------------------------------------------------------------
+
+datatagr::lost_labels_action()
+datatagr::get_lost_labels_action()
+# datatagr::lost_labels_action(action = "error")
+
+# cleanepi ----------------------------------------------------------------
+
+# check sequence of events
+
+dat_clean <- dat %>% 
+  # arrange columns
+  dplyr::select(
+    pid, infection_history, exp_num, last_exp_date, last_vax_type,
+    dplyr::everything()
+  ) %>% 
+  # arrange rows
+  dplyr::arrange(
+    pid, infection_history, exp_num, last_exp_date, last_vax_type, date
+  ) %>% 
+  # cleanepi
+  cleanepi::check_date_sequence(
+    target_columns = c("last_exp_date", "date")
+  ) %>% 
+  # cleanepi::print_report()
+  cleanepi::timespan(
+    target_column = "last_exp_date",
+    end_date = "date",
+    span_unit = "days",
+    span_column_name = "t_since_last_exp",
+    span_remainder_unit = "days"
+  ) %>% 
+  # extra wrangling
+  mutate(
+    last_vax_type = forcats::fct_infreq(last_vax_type),
+    exp_num = forcats::as_factor(exp_num),
+    titre_type = forcats::fct_relevel(titre_type,"Ancestral", "Alpha"),
+    # censored = forcats::as_factor(censored) # in {epikinetics} censored needs to be numeric
+  ) %>% 
+  # tag with {datatagr}
+  datatagr::make_datatagr(
+    pid = "subject id",
+    infection_history = "subject infection history",
+    exp_num = "number of vaccine exposures",
+    last_exp_date = "date of last exposure",
+    last_vax_type = "type of vaccine in the last exposure",
+    date = "date of observation of titre in serum sample",
+    titre_type = "type of antigen challenged against serum sample",
+    value = "titre value",
+    censored = "censored titre value out of limit of detection [5 - 2560] bellow (-1) or above (+1)",
+    t_since_last_exp = "time interval between last vaccine exposure and observed serum sample titre"
+  ) %>% 
+  # validate with {datatagr}
+  datatagr::validate_datatagr(
+    pid = "numeric",
+    infection_history = "character",
+    exp_num = "factor",
+    last_exp_date="Date",
+    last_vax_type = "factor",
+    date = "Date",
+    titre_type = "factor",
+    value = "numeric",
+    censored = "numeric",
+    t_since_last_exp = "numeric"
+  ) %>% 
+  # datatagr::labels_df() %>% # this extract labels as column names [affects downstream] 
+  identity()
+
+dat_clean
+
+
+# visualization: data to model --------------------------------------------
+
+dat_clean %>% 
+  mutate(log2_value = log2(value)) %>% 
+  filter(censored == "0") %>% 
+  # skimr::skim(log2_value)
+  ggplot(aes(x = t_since_last_exp, y = value)) +
+  geom_point() +
+  geom_smooth() +
+  scale_y_continuous(trans = "log2") +
+  facet_wrap(~infection_history+titre_type) +
+  xlim(0,150)
+
+
+# model -------------------------------------------------------------------
+
+#' In this vignette we use a dataset representing the Delta wave 
+#' which is installed with this package, 
+#' specifying a regression model that 
+#' just looks at the effect of infection history.
+#' 
+#' Figure 2 from the paper shows population level fits for each wave, 
+#' disaggregated by infection history and titre type. 
+#' Here we reproduce the facets for the Delta wave. 
+#' Once the model has been fitted, 
+#' simulate population trajectories using the fitted population parameters.
+
+dat_clean %>% class()
+
+mod <- epikinetics::biokinetics$new(
+  data = dat_clean %>% data.table::as.data.table(), 
+  covariate_formula = ~0 + infection_history
+)
+
+# WAIT this takes 14 minutes
+# tictoc::tic()
+delta <- mod$fit(
+  parallel_chains = 4,
+  iter_warmup = 50,
+  iter_sampling = 200,
+  threads_per_chain = 4
+)
+# tictoc::toc()
+
+# this takes 10 seconds
+# tictoc::tic()
+res <- mod$simulate_population_trajectories()
+head(res)
+# tictoc::toc()
+
+# visualize output --------------------------------------------------------
+
+plot_data <- res
+
+plot_data[, titre_type := forcats::fct_relevel(
+  titre_type,
+  c("Ancestral", "Alpha", "Delta"))]
+
+ggplot(data = plot_data) +
+  geom_line(aes(x = t,
+                y = me,
+                colour = titre_type)) +
+  geom_ribbon(aes(x = t,
+                  ymin = lo,
+                  ymax = hi,
+                  fill = titre_type), alpha = 0.65) +
+  scale_y_continuous(trans = "log2") +
+  labs(x = "Time since last exposure (days)",
+       y = expression(paste("Titre (IC"[50], ")"))) +
+  facet_wrap(infection_history ~ titre_type)
+
+ +
+ + +
+
+ + +
+
+ + + diff --git a/images.html b/images.html index 5e440128..1ff19bbf 100644 --- a/images.html +++ b/images.html @@ -128,6 +128,8 @@ More @@ -318,7 +320,9 @@

  • Learner Profiles
  • -
  • Glossary of Terms: Epiverse-TRACE
  • +
  • Solutions using epikinetics data and package
  • +
  • Solutions using epikinetics data and package
  • +
  • Glossary of Terms: Epiverse-TRACE
  • diff --git a/index.html b/index.html index 0b7c4102..5682d107 100644 --- a/index.html +++ b/index.html @@ -105,7 +105,7 @@ - Read and clean case data, and make linelist for outbreak analytics with R: Solutions using epikinetics data and package +
    + Read and clean case data, and make linelist for outbreak analytics with R +
    + +
    +
    + + + + + +
    +
    +

    Solutions using epikinetics data and package

    +

    Last updated on 2024-10-08 | + + Edit this page

    + + + + + +
    + +
    + + +
    +

    Descriptive analysis

    +
    +

    R +

    +
    +
    +#' goal:
    +#' vaccination incidence stratified by vaccine type
    +#' observation incidence stratified by censoring status
    +
    +# load packages -----------------------------------------------------------
    +
    +library(tidyverse)
    +library(cleanepi)
    +library(datatagr) # a generalization of {linelist}
    +library(incidence2)
    +
    +# read data ---------------------------------------------------------------
    +
    +# rawdata <- "data-raw/delta.csv"
    +rawdata <- "https://raw.githubusercontent.com/seroanalytics/epikinetics/refs/heads/main/inst/delta_full.rds"
    +
    +dat <- read_csv(rawdata)
    +
    +dat %>% glimpse()
    +
    +# what these columns mean? ------------------------------------------------
    +
    +#' data dictionary: https://seroanalytics.org/epikinetics/articles/data.html
    +#' reference paper: https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(24)00484-5/fulltext
    +#' location: https://github.com/seroanalytics/epikinetics/tree/main/inst
    +
    +# 335 subjects where followed up
    +dat %>% count(pid)
    +
    +## what "titre type" means? -------------------------------------------------
    +
    +#' In the time series, 
    +#' each subject had monthly serum measurements 
    +#' for three types of antigens ("titre_type").
    +#' 
    +#' Serum samples where challenged against Ancestral, Alpha and Delta antigens.
    +#' 
    +#' The column "value" measures the titre of 
    +#' the neutralizing effect of each sample against each antigen 
    +
    +dat %>%
    +  dplyr::filter(pid == 2) %>% 
    +  dplyr::arrange(date) %>% 
    +  # select time invariant columns
    +  dplyr::select(
    +    pid, infection_history, exp_num, last_exp_date, last_vax_type,
    +    dplyr::everything()
    +  )
    +
    +## what "censored" means? ----------------------------------------------------
    +
    +# context: censored regression model
    +# the "value" as the outcome is censored above or below
    +# because the it was measured outside the limits of detection
    +# threshold limit below: 5
    +# threshold limit above: 2560
    +
    +dat %>% 
    +  ggplot(aes(value, fill = as.factor(censored))) + 
    +  geom_histogram()
    +
    +# datatagr ----------------------------------------------------------------
    +
    +datatagr::lost_labels_action()
    +datatagr::get_lost_labels_action()
    +# datatagr::lost_labels_action(action = "error")
    +
    +# cleanepi ----------------------------------------------------------------
    +
    +# check sequence of events
    +
    +dat_clean <- dat %>% 
    +  # arrange columns
    +  dplyr::select(
    +    pid, infection_history, exp_num, last_exp_date, last_vax_type,
    +    dplyr::everything()
    +  ) %>% 
    +  # arrange rows
    +  dplyr::arrange(
    +    pid, infection_history, exp_num, last_exp_date, last_vax_type, date
    +  ) %>% 
    +  # cleanepi
    +  cleanepi::check_date_sequence(
    +    target_columns = c("last_exp_date", "date")
    +  ) %>% 
    +  # cleanepi::print_report()
    +  cleanepi::timespan(
    +    target_column = "last_exp_date",
    +    end_date = "date",
    +    span_unit = "days",
    +    span_column_name = "t_since_last_exp",
    +    span_remainder_unit = "days"
    +    ) %>% 
    +  # extra wrangling
    +  mutate(
    +    last_vax_type = forcats::fct_infreq(last_vax_type),
    +    exp_num = forcats::as_factor(exp_num),
    +    titre_type = forcats::fct_relevel(titre_type,"Ancestral", "Alpha"),
    +    censored = forcats::as_factor(censored)
    +  ) %>% 
    +  # tag with {datatagr}
    +  datatagr::make_datatagr(
    +    pid = "subject id",
    +    infection_history = "subject infection history",
    +    exp_num = "number of vaccine exposures",
    +    last_exp_date = "date of last exposure",
    +    last_vax_type = "type of vaccine in the last exposure",
    +    date = "date of observation of titre in serum sample",
    +    titre_type = "type of antigen challenged against serum sample",
    +    value = "titre value",
    +    censored = "censored titre value out of limit of detection [5 - 2560] bellow (-1) or above (+1)",
    +    t_since_last_exp = "time interval between last vaccine exposure and observed serum sample titre"
    +  ) %>% 
    +  # validate with {datatagr}
    +  datatagr::validate_datatagr(
    +    pid = "numeric",
    +    infection_history = "character",
    +    exp_num = "factor",
    +    last_exp_date="Date",
    +    last_vax_type = "factor",
    +    date = "Date",
    +    titre_type = "factor",
    +    value = "numeric",
    +    censored = "factor",
    +    t_since_last_exp = "numeric"
    +  ) %>% 
    +  # datatagr::labels_df() %>% # this extract labels as column names [affects downstream] 
    +  identity()
    +
    +dat_clean
    +
    +# distribution of the time from the last vaccine to first observation
    +dat_clean %>% 
    +  group_by(pid) %>% 
    +  filter(date == min(date)) %>% 
    +  slice(1) %>% 
    +  ungroup() %>% 
    +  ggplot(aes(t_since_last_exp)) + 
    +  geom_histogram()
    +
    +## subject table -----------------------------------------------------------
    +
    +# subject time-invariant data
    +dat_subject <- dat_clean %>% 
    +  # {datatagr} reacts with dplyr::select() but not with dplyr::count() when losing tags
    +  dplyr::select(pid, infection_history, exp_num, last_exp_date, last_vax_type) %>% 
    +  dplyr::count(pid, infection_history, exp_num, last_exp_date, last_vax_type)
    +  
    +# table 1: time-invariant columns
    +dat_subject %>% 
    +  compareGroups::compareGroups(
    +    data = .,
    +    formula = ~infection_history + exp_num + last_exp_date + last_vax_type 
    +  ) %>% 
    +  compareGroups::createTable()
    +
    +# table 2: were vaccine type differently applied between naive and non-naive?
    +dat_subject %>% 
    +  compareGroups::compareGroups(
    +    data = .,
    +    formula = last_vax_type~infection_history,
    +    byrow = TRUE
    +  ) %>% 
    +  compareGroups::createTable(show.all = TRUE)
    +
    +# vaccinations ------------------------------------------------------------
    +
    +## by vaccine type ---------------------------------------------
    +
    +dat_subject %>% 
    +  # aggregate
    +  incidence2::incidence(
    +    date_index = "last_exp_date", # change: "date" or "last_exp_date"
    +    groups = ("last_vax_type"), # change: "titre_type" or "infection_history" or "last_vax_type" or c("infection_history", "titre_type")
    +    interval = "month", # change: "day" or "week" or "epiweek" or "month"
    +    # complete_dates = TRUE, # relevant to downstream analysis [time-series data]
    +  ) %>% 
    +  # transform to cumulative per group (optional display)
    +  # incidence2::cumulate() %>% 
    +  # plot
    +  incidence2:::plot.incidence2(
    +    fill = "last_vax_type"
    +  )
    +
    +# observations ------------------------------------------------------------
    +
    +# by history-variants
    +# not required, this reflect the proportion of "infection_history" in the cohort
    +
    +## by censored -----------------------------------------------
    +
    +dat_clean %>% count(censored)
    +
    +dat_clean %>% 
    +  incidence2::incidence(
    +    date_index = "date", # change: "date" or "last_exp_date"
    +    groups = "censored", # change: "censored" or "titre_type" or "infection_history" or "last_vax_type" or c("infection_history", "titre_type")
    +    interval = "month", # change: "day", "week", "month"
    +    # complete_dates = TRUE # relevant to downstream analysis [time-series data]
    +  ) %>% 
    +  incidence2:::plot.incidence2(
    +    fill = "censored"
    +  )
    +
    + +
    + + +
    +
    + + +
    +
    + + + diff --git a/instructor/epikinetics-statistics.html b/instructor/epikinetics-statistics.html new file mode 100644 index 00000000..fd99c304 --- /dev/null +++ b/instructor/epikinetics-statistics.html @@ -0,0 +1,600 @@ + +Read and clean case data, and make linelist for outbreak analytics with R: Solutions using epikinetics data and package +
    + Read and clean case data, and make linelist for outbreak analytics with R +
    + +
    +
    + + + + + +
    +
    +

    Solutions using epikinetics data and package

    +

    Last updated on 2024-10-08 | + + Edit this page

    + + + + + +
    + +
    + + +
    +

    Statistical analysis

    +
    +

    R +

    +
    +
    +#' goal:
    +#' model the kinetics of neutralising-antibody titres after antigenic SARS-CoV-2 exposure?
    +
    +# load packages -----------------------------------------------------------
    +
    +library(tidyverse)
    +library(cleanepi)
    +library(datatagr) # a generalization of {linelist}
    +# library(epikinetics)
    +
    +# read data ---------------------------------------------------------------
    +
    +# rawdata <- "data-raw/delta.csv"
    +rawdata <- "https://raw.githubusercontent.com/seroanalytics/epikinetics/refs/heads/main/inst/delta_full.rds"
    +
    +dat <- read_csv(rawdata)
    +
    +dat %>% glimpse()
    +
    +# what these columns mean? ------------------------------------------------
    +
    +#' data dictionary: https://seroanalytics.org/epikinetics/articles/data.html
    +#' reference paper: https://www.thelancet.com/journals/laninf/article/PIIS1473-3099(24)00484-5/fulltext
    +#' location: https://github.com/seroanalytics/epikinetics/tree/main/inst
    +
    +# 335 subjects where followed up
    +dat %>% count(pid)
    +
    +## what "titre type" means? -------------------------------------------------
    +
    +#' In the time series, 
    +#' each subject had monthly serum measurements 
    +#' for three types of antigens ("titre_type").
    +#' 
    +#' Serum samples where challenged against Ancestral, Alpha and Delta antigens.
    +#' 
    +#' The column "value" measures the titre of 
    +#' the neutralizing effect of each sample against each antigen 
    +
    +dat %>%
    +  dplyr::filter(pid == 2) %>% 
    +  dplyr::arrange(date) %>% 
    +  # select time invariant columns
    +  dplyr::select(
    +    pid, infection_history, exp_num, last_exp_date, last_vax_type,
    +    dplyr::everything()
    +  )
    +
    +## what "censored" means? ----------------------------------------------------
    +
    +# context: censored regression model
    +# the "value" as the outcome is censored above or below
    +# because the it was measured outside the limits of detection
    +# threshold limit below: 5
    +# threshold limit above: 2560
    +
    +dat %>% 
    +  ggplot(aes(value, fill = as.factor(censored))) + 
    +  geom_histogram()
    +
    +# datatagr ----------------------------------------------------------------
    +
    +datatagr::lost_labels_action()
    +datatagr::get_lost_labels_action()
    +# datatagr::lost_labels_action(action = "error")
    +
    +# cleanepi ----------------------------------------------------------------
    +
    +# check sequence of events
    +
    +dat_clean <- dat %>% 
    +  # arrange columns
    +  dplyr::select(
    +    pid, infection_history, exp_num, last_exp_date, last_vax_type,
    +    dplyr::everything()
    +  ) %>% 
    +  # arrange rows
    +  dplyr::arrange(
    +    pid, infection_history, exp_num, last_exp_date, last_vax_type, date
    +  ) %>% 
    +  # cleanepi
    +  cleanepi::check_date_sequence(
    +    target_columns = c("last_exp_date", "date")
    +  ) %>% 
    +  # cleanepi::print_report()
    +  cleanepi::timespan(
    +    target_column = "last_exp_date",
    +    end_date = "date",
    +    span_unit = "days",
    +    span_column_name = "t_since_last_exp",
    +    span_remainder_unit = "days"
    +  ) %>% 
    +  # extra wrangling
    +  mutate(
    +    last_vax_type = forcats::fct_infreq(last_vax_type),
    +    exp_num = forcats::as_factor(exp_num),
    +    titre_type = forcats::fct_relevel(titre_type,"Ancestral", "Alpha"),
    +    # censored = forcats::as_factor(censored) # in {epikinetics} censored needs to be numeric
    +  ) %>% 
    +  # tag with {datatagr}
    +  datatagr::make_datatagr(
    +    pid = "subject id",
    +    infection_history = "subject infection history",
    +    exp_num = "number of vaccine exposures",
    +    last_exp_date = "date of last exposure",
    +    last_vax_type = "type of vaccine in the last exposure",
    +    date = "date of observation of titre in serum sample",
    +    titre_type = "type of antigen challenged against serum sample",
    +    value = "titre value",
    +    censored = "censored titre value out of limit of detection [5 - 2560] bellow (-1) or above (+1)",
    +    t_since_last_exp = "time interval between last vaccine exposure and observed serum sample titre"
    +  ) %>% 
    +  # validate with {datatagr}
    +  datatagr::validate_datatagr(
    +    pid = "numeric",
    +    infection_history = "character",
    +    exp_num = "factor",
    +    last_exp_date="Date",
    +    last_vax_type = "factor",
    +    date = "Date",
    +    titre_type = "factor",
    +    value = "numeric",
    +    censored = "numeric",
    +    t_since_last_exp = "numeric"
    +  ) %>% 
    +  # datatagr::labels_df() %>% # this extract labels as column names [affects downstream] 
    +  identity()
    +
    +dat_clean
    +
    +
    +# visualization: data to model --------------------------------------------
    +
    +dat_clean %>% 
    +  mutate(log2_value = log2(value)) %>% 
    +  filter(censored == "0") %>% 
    +  # skimr::skim(log2_value)
    +  ggplot(aes(x = t_since_last_exp, y = value)) +
    +  geom_point() +
    +  geom_smooth() +
    +  scale_y_continuous(trans = "log2") +
    +  facet_wrap(~infection_history+titre_type) +
    +  xlim(0,150)
    +
    +
    +# model -------------------------------------------------------------------
    +
    +#' In this vignette we use a dataset representing the Delta wave 
    +#' which is installed with this package, 
    +#' specifying a regression model that 
    +#' just looks at the effect of infection history.
    +#' 
    +#' Figure 2 from the paper shows population level fits for each wave, 
    +#' disaggregated by infection history and titre type. 
    +#' Here we reproduce the facets for the Delta wave. 
    +#' Once the model has been fitted, 
    +#' simulate population trajectories using the fitted population parameters.
    +
    +dat_clean %>% class()
    +
    +mod <- epikinetics::biokinetics$new(
    +  data = dat_clean %>% data.table::as.data.table(), 
    +  covariate_formula = ~0 + infection_history
    +)
    +
    +# WAIT this takes 14 minutes
    +# tictoc::tic()
    +delta <- mod$fit(
    +  parallel_chains = 4,
    +  iter_warmup = 50,
    +  iter_sampling = 200,
    +  threads_per_chain = 4
    +)
    +# tictoc::toc()
    +
    +# this takes 10 seconds
    +# tictoc::tic()
    +res <- mod$simulate_population_trajectories()
    +head(res)
    +# tictoc::toc()
    +
    +# visualize output --------------------------------------------------------
    +
    +plot_data <- res
    +
    +plot_data[, titre_type := forcats::fct_relevel(
    +  titre_type,
    +  c("Ancestral", "Alpha", "Delta"))]
    +
    +ggplot(data = plot_data) +
    +  geom_line(aes(x = t,
    +                y = me,
    +                colour = titre_type)) +
    +  geom_ribbon(aes(x = t,
    +                  ymin = lo,
    +                  ymax = hi,
    +                  fill = titre_type), alpha = 0.65) +
    +  scale_y_continuous(trans = "log2") +
    +  labs(x = "Time since last exposure (days)",
    +       y = expression(paste("Titre (IC"[50], ")"))) +
    +  facet_wrap(infection_history ~ titre_type)
    +
    + +
    + + +
    +
    + + +
    +
    + + + diff --git a/instructor/images.html b/instructor/images.html index 62a26346..f00d5621 100644 --- a/instructor/images.html +++ b/instructor/images.html @@ -129,6 +129,8 @@ @@ -320,6 +322,8 @@

    Extract All Images
    +
  • Solutions using epikinetics data and package
  • +
  • Solutions using epikinetics data and package
  • Glossary of Terms: Epiverse-TRACE
  • diff --git a/instructor/index.html b/instructor/index.html index b826c6c1..0ffc8f29 100644 --- a/instructor/index.html +++ b/instructor/index.html @@ -105,7 +105,7 @@ -