cs1.Rmd

---
title: "cs1"
output: pdf_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}

library(serosolver)
library(plyr)
library(data.table)

## Required for this analysis
library(reshape2)
library(foreach)
library(doParallel)
library(bayesplot)
library(coda)
library(ggplot2)
library(viridis)
library(ggpubr)

# set up cluster
set.seed(0)
cl <- makeCluster(5)

## Note that this vignette was generated on a Windows machine,
## and the setup for parallelisation is different on a Linux or Mac:

```

```{r}
filename <- "case_study_1"
resolution <- 4 ## set to 4 for quarterly resolution
sample_years <- 2009:2012

serosolver::describe_priors()
prior_version <- 2

```


```{r}

## Read in titre data
# unvaccinated
input_dat_path <- system.file("extdata", "HKdata_h1n1_unvac.csv", package = "serosolver")
input_dat <- read.csv(file = input_dat_path, header = TRUE)
# vaccinated
# input_dat_path2 <- system.file("extdata", "HKdata_h1n1_vac.csv", package = "serosolver")
# input_dat_vac <- read.csv(file = input_dat_path2, header = TRUE)

indivs <- unique(input_dat$individual) #all individuals

# Subset data for indivs
titre_dat <- input_dat[input_dat$individual %in% indivs,
                       c("individual","virus","titre","samples","DOB")]
titre_dat$individual <- match(titre_dat$individual, indivs)

titre_dat <- unique(titre_dat)
titre_dat <- plyr::ddply(titre_dat,.(individual,virus,samples),
                         function(x) cbind(x,"run"=1:nrow(x),"group"=1))
print(head(titre_dat))
#>   individual virus titre samples  DOB run group
#> 1          1  8037     0    8039 8036   1     1
#> 2          1  8037     0    8040 8036   1     1
#> 3          1  8037     7    8044 8036   1     1
#> 4          1  8037     7    8047 8036   1     1
#> 5          2  8037     0    8039 8036   1     1
#> 6          2  8037     5    8041 8036   1     1

strain_isolation_times <- seq(sample_years[1]*resolution+1, sample_years[4]*resolution, by=1)

```

```{r}

par_tab_path <- system.file("extdata", "par_tab_base.csv", package = "serosolver")
par_tab <- read.csv(par_tab_path, stringsAsFactors=FALSE)

## Set parameters for beta and alpha to 1
par_tab[par_tab$names %in% c("alpha","beta"),"values"] <- c(1/3,1/3)
## Maximum recordable log titre in these data is 9
par_tab[par_tab$names == "MAX_TITRE","values"] <- 9

## Remove phi parameters, as these are integrated out under prior version 2
par_tab <- par_tab[par_tab$names != "phi",]

## Fix cross reactivity and antigenic seniority
par_tab[par_tab$names %in% c("tau","sigma1","sigma2"),"fixed"] <- 1 
## mu, tau, sigma1, and sigma2 are fixed
par_tab[par_tab$names %in% c("tau","sigma1","sigma2"),"values"] <- 0 
## set these values to 0

```

```{r}

## Distinct filename for each chain
no_chains <- 5
filenames <- paste0(filename, "_",1:no_chains)
chain_path <- sub("par_tab_base.csv","",par_tab_path)
chain_path_real <- paste0(chain_path, "cs1_real/")
chain_path_sim <- paste0(chain_path, "cs1_sim/")

## Create the posterior solving function that will be used in the MCMC framework 
par_tab[par_tab$names == "mu_short","lower_bound"] <- 1
model_func <- create_posterior_func(par_tab=par_tab,
                            titre_dat=titre_dat,
                            strain_isolation_times = strain_isolation_times,
                            version=prior_version) # function in posteriors.R
#> Creating posterior solving function...
#> 

```

```{r}

## Generate results in parallel
res <- foreach(x = filenames, .packages = c('serosolver','data.table','plyr')) %dopar% {
  ## Not all random starting conditions return finite likelihood, so for each chain generate random
  ## conditions until we get one with a finite likelihood
  start_prob <- -Inf
  while(!is.finite(start_prob)){
    ## Generating starting antibody kinetics parameters
    start_tab <- generate_start_tab(par_tab)
    
    ## Generate starting infection history
    start_inf <- setup_infection_histories_titre(titre_dat, strain_isolation_times, 
                                                 space=3,titre_cutoff=4)
    start_prob <- sum(model_func(start_tab$values, start_inf)[[1]])
  }
  
  res <- run_MCMC(par_tab = start_tab, 
                  titre_dat = titre_dat,
                  antigenic_map = NULL,
                  strain_isolation_times = strain_isolation_times,
                  start_inf_hist = start_inf, 
                  mcmc_pars = c("iterations"=2000000,"popt"=0.44,"popt_hist"=0.44,
                                "opt_freq"=1000,"thin"=1,"adaptive_period"=500000, 
                                "save_block"=1000, "thin_hist"=100,"hist_sample_prob"=1,
                                "switch_sample"=2, "burnin"=0, "inf_propn"=0.5, 
                                "move_size"=3,"hist_opt"=1,"swap_propn"=0.5,
                                "hist_switch_prob"=0.5,"year_swap_propn"=1),
                  filename = paste0(chain_path_real,x), 
                  CREATE_POSTERIOR_FUNC = create_posterior_func, 
                  version = prior_version)
}

```