forked from seroanalytics/serosolver
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcs1.Rmd
153 lines (120 loc) · 4.95 KB
/
cs1.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
---
title: "cs1"
output: pdf_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(serosolver)
library(plyr)
library(data.table)
## Required for this analysis
library(reshape2)
library(foreach)
library(doParallel)
library(bayesplot)
library(coda)
library(ggplot2)
library(viridis)
library(ggpubr)
# set up cluster
set.seed(0)
cl <- makeCluster(5)
## Note that this vignette was generated on a Windows machine,
## and the setup for parallelisation is different on a Linux or Mac:
```
```{r}
filename <- "case_study_1"
resolution <- 4 ## set to 4 for quarterly resolution
sample_years <- 2009:2012
serosolver::describe_priors()
prior_version <- 2
```
```{r}
## Read in titre data
# unvaccinated
input_dat_path <- system.file("extdata", "HKdata_h1n1_unvac.csv", package = "serosolver")
input_dat <- read.csv(file = input_dat_path, header = TRUE)
# vaccinated
# input_dat_path2 <- system.file("extdata", "HKdata_h1n1_vac.csv", package = "serosolver")
# input_dat_vac <- read.csv(file = input_dat_path2, header = TRUE)
indivs <- unique(input_dat$individual) #all individuals
# Subset data for indivs
titre_dat <- input_dat[input_dat$individual %in% indivs,
c("individual","virus","titre","samples","DOB")]
titre_dat$individual <- match(titre_dat$individual, indivs)
titre_dat <- unique(titre_dat)
titre_dat <- plyr::ddply(titre_dat,.(individual,virus,samples),
function(x) cbind(x,"run"=1:nrow(x),"group"=1))
print(head(titre_dat))
#> individual virus titre samples DOB run group
#> 1 1 8037 0 8039 8036 1 1
#> 2 1 8037 0 8040 8036 1 1
#> 3 1 8037 7 8044 8036 1 1
#> 4 1 8037 7 8047 8036 1 1
#> 5 2 8037 0 8039 8036 1 1
#> 6 2 8037 5 8041 8036 1 1
strain_isolation_times <- seq(sample_years[1]*resolution+1, sample_years[4]*resolution, by=1)
```
```{r}
par_tab_path <- system.file("extdata", "par_tab_base.csv", package = "serosolver")
par_tab <- read.csv(par_tab_path, stringsAsFactors=FALSE)
## Set parameters for beta and alpha to 1
par_tab[par_tab$names %in% c("alpha","beta"),"values"] <- c(1/3,1/3)
## Maximum recordable log titre in these data is 9
par_tab[par_tab$names == "MAX_TITRE","values"] <- 9
## Remove phi parameters, as these are integrated out under prior version 2
par_tab <- par_tab[par_tab$names != "phi",]
## Fix cross reactivity and antigenic seniority
par_tab[par_tab$names %in% c("tau","sigma1","sigma2"),"fixed"] <- 1
## mu, tau, sigma1, and sigma2 are fixed
par_tab[par_tab$names %in% c("tau","sigma1","sigma2"),"values"] <- 0
## set these values to 0
```
```{r}
## Distinct filename for each chain
no_chains <- 5
filenames <- paste0(filename, "_",1:no_chains)
chain_path <- sub("par_tab_base.csv","",par_tab_path)
chain_path_real <- paste0(chain_path, "cs1_real/")
chain_path_sim <- paste0(chain_path, "cs1_sim/")
## Create the posterior solving function that will be used in the MCMC framework
par_tab[par_tab$names == "mu_short","lower_bound"] <- 1
model_func <- create_posterior_func(par_tab=par_tab,
titre_dat=titre_dat,
strain_isolation_times = strain_isolation_times,
version=prior_version) # function in posteriors.R
#> Creating posterior solving function...
#>
```
```{r}
## Generate results in parallel
res <- foreach(x = filenames, .packages = c('serosolver','data.table','plyr')) %dopar% {
## Not all random starting conditions return finite likelihood, so for each chain generate random
## conditions until we get one with a finite likelihood
start_prob <- -Inf
while(!is.finite(start_prob)){
## Generating starting antibody kinetics parameters
start_tab <- generate_start_tab(par_tab)
## Generate starting infection history
start_inf <- setup_infection_histories_titre(titre_dat, strain_isolation_times,
space=3,titre_cutoff=4)
start_prob <- sum(model_func(start_tab$values, start_inf)[[1]])
}
res <- run_MCMC(par_tab = start_tab,
titre_dat = titre_dat,
antigenic_map = NULL,
strain_isolation_times = strain_isolation_times,
start_inf_hist = start_inf,
mcmc_pars = c("iterations"=2000000,"popt"=0.44,"popt_hist"=0.44,
"opt_freq"=1000,"thin"=1,"adaptive_period"=500000,
"save_block"=1000, "thin_hist"=100,"hist_sample_prob"=1,
"switch_sample"=2, "burnin"=0, "inf_propn"=0.5,
"move_size"=3,"hist_opt"=1,"swap_propn"=0.5,
"hist_switch_prob"=0.5,"year_swap_propn"=1),
filename = paste0(chain_path_real,x),
CREATE_POSTERIOR_FUNC = create_posterior_func,
version = prior_version)
}
```