-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit b5e32db
Showing
4 changed files
with
338 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 | ||
|
||
WORKDIR /underreport | ||
|
||
RUN apt update -y | ||
RUN apt upgrade -y | ||
RUN apt install python3.8 python3.8-distutils python3.8-dev python3-pip wget -y | ||
RUN ln -s /usr/bin/pip3 /usr/bin/pip | ||
RUN ln -s /usr/bin/python3.8 /usr/bin/python | ||
RUN wget https://bootstrap.pypa.io/get-pip.py | ||
RUN python3.8 get-pip.py | ||
|
||
COPY requirements.txt . | ||
RUN pip install -r requirements.txt | ||
RUN mkdir /underreport/output | ||
|
||
COPY underreport.py /underreport | ||
COPY adjusted_cfr.csv /underreport |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
"iso3c","cfr_mid","cfr_low","cfr_high" | ||
1,0.971792800536214,0.81365988948165,1.18470876170315 | ||
2,0.977803477529202,0.818110179878572,1.19297074259223 | ||
3,1.20969819787998,1.01672872047031,1.46003854011842 | ||
4,1.400174924698,1.1792411500967,1.68067357560321 | ||
5,1.58120251660305,1.33357490240717,1.89049905241268 | ||
6,1.41194695458005,1.18953009591815,1.69366665937661 | ||
7,1.44679267858955,1.2194686154977,1.73365260703696 | ||
8,1.39845908055984,1.17794342806622,1.67822427584619 | ||
9,1.47558127122979,1.24356659330543,1.76815316907781 | ||
10,1.34347763568619,1.13058459180382,1.61588205184933 | ||
11,1.13482149758689,0.952660393284068,1.37383951582361 | ||
12,1.3762376047948,1.15857539806476,1.65328313832609 | ||
13,1.30409694476306,1.0966113989631,1.57061421498939 | ||
14,1.4787506749354,1.24637014448068,1.77140290897546 | ||
15,1.27979933399993,1.07747462571867,1.54002099953576 | ||
16,1.57328843885159,1.32745609587598,1.88028680872675 | ||
17,1.36170929426836,1.14617841134316,1.63656496040202 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
gpflow==2.1.3 | ||
pandas | ||
requests | ||
matplotlib | ||
numpy==1.16.4 | ||
gast==0.3.3 | ||
six==1.12.0 | ||
tensorflow==2.3.1 | ||
tensorflow_probability==0.11.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,293 @@ | ||
import pandas as pd | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
import tensorflow as tf | ||
import tensorflow_probability as tfp | ||
from tensorflow_probability import distributions as tfd | ||
from scipy import special | ||
from scipy.stats import lognorm | ||
import gpflow | ||
from gpflow.ci_utils import ci_niter | ||
from gpflow import set_trainable | ||
import requests | ||
import json | ||
import sys, getopt | ||
|
||
gpflow.config.set_default_float(np.float64) | ||
gpflow.config.set_default_jitter(1e-4) | ||
gpflow.config.set_default_summary_fmt("notebook") | ||
# convert to float64 for tfp to play nicely with gpflow in 64 | ||
f64 = gpflow.utilities.to_default_float | ||
tf.random.set_seed(123) | ||
# sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True)) | ||
|
||
|
||
|
||
def underreport_estimate(cases, deaths): | ||
pass | ||
def get_regional_deaths(region): | ||
endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(region)) | ||
deaths = json.loads(endpointm.text) | ||
deaths = pd.DataFrame(deaths) | ||
deaths.index = pd.to_datetime(deaths.dates) | ||
deaths.drop(columns = 'dates', inplace= True) | ||
deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] | ||
deaths['total'] = deaths['confirmed'] +deaths['suspected'] | ||
deaths['total'] = deaths['total'] | ||
return deaths | ||
|
||
def get_nacional_deaths(): | ||
endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state=16') | ||
deaths = json.loads(endpointm.text) | ||
deaths = pd.DataFrame(deaths) | ||
deaths.index = pd.to_datetime(deaths.dates) | ||
deaths.drop(columns = 'dates', inplace= True) | ||
deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] | ||
|
||
for i in range(1,16): | ||
endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(i)) | ||
deaths2 = json.loads(endpointm.text) | ||
deaths2 = pd.DataFrame(deaths2) | ||
deaths2.index = pd.to_datetime(deaths2.dates) | ||
deaths2.drop(columns = 'dates', inplace= True) | ||
deaths2.index = [x.strftime("%Y-%m-%d") for x in deaths2.index] | ||
deaths = deaths+ deaths2 | ||
deaths['total'] = deaths['confirmed'] +deaths['suspected'] | ||
deaths['total'] = deaths['total'] | ||
return deaths | ||
|
||
def delay_correction(): | ||
mu, sigma = 13, 12.7 #? | ||
mean = np.log(mu**2 / np.sqrt(mu**2 + sigma**2) ) | ||
std = np.sqrt(np.log(1 + sigma**2/mu**2) ) | ||
f = lognorm(s = std, scale = np.exp(mean)) | ||
days = 15 | ||
|
||
pass | ||
|
||
def sissor(x): | ||
return x[:10] | ||
|
||
def args_parser(argv): | ||
from_region = '' | ||
to_region = '' | ||
try: | ||
opts, args = getopt.getopt(argv,"hf:t:n:",["from=","to=", "gpun"]) | ||
except getopt.GetoptError: | ||
print( 'underreport.py -f <regionnumber> -t <regionnumber> -n <gpunumber>') | ||
sys.exit(2) | ||
for opt, arg in opts: | ||
if opt == '-h': | ||
print( 'underreport.py -f <regionnumber> -t <regionnumber> -n <gpunumber> ') | ||
|
||
sys.exit() | ||
elif opt in ("-f", "--ifile"): | ||
from_region = arg | ||
elif opt in ("-t", "--ofile"): | ||
to_region = arg | ||
elif opt in ("-n", "--ofile"): | ||
ngpu = arg | ||
|
||
print( 'Computing underreporting from region number "', from_region) | ||
print( 'to region number ', to_region) | ||
return from_region, to_region, ngpu | ||
|
||
#config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True)) | ||
#gpflow.reset_default_session(config=config) | ||
endpointnew = requests.get('http://192.168.2.223:5006/getNewCasesAllStates') | ||
actives = json.loads(endpointnew.text) | ||
dates = pd.to_datetime(actives['dates']) | ||
dates = [x.strftime("%Y-%m-%d") for x in dates] | ||
# Parsing args | ||
print(sys.argv[1:]) | ||
from_region, to_region, gpun = args_parser(sys.argv[1:]) | ||
|
||
|
||
ssess =tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True)) | ||
|
||
|
||
with tf.device('/gpu:'+gpun): | ||
|
||
|
||
for current_region in range(int(from_region),int(to_region)+1): | ||
#DATA | ||
#try: | ||
if current_region == 17: | ||
endpointnew = requests.get('http://192.168.2.223:5006/getNationalNewCases') | ||
actives = json.loads(endpointnew.text) | ||
dates = pd.to_datetime(actives['dates']) | ||
dates = [x.strftime("%Y-%m-%d") for x in dates] | ||
reg_active = pd.DataFrame(data = {'cases': actives['cases']}, index = dates) | ||
|
||
|
||
endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state=16') | ||
deaths = json.loads(endpointm.text) | ||
deaths = pd.DataFrame(deaths) | ||
deaths.index = pd.to_datetime(deaths.dates) | ||
deaths.drop(columns = 'dates', inplace= True) | ||
deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] | ||
|
||
for i in range(1,16): | ||
endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(i)) | ||
deaths2 = json.loads(endpointm.text) | ||
deaths2 = pd.DataFrame(deaths2) | ||
deaths2.index = pd.to_datetime(deaths2.dates) | ||
deaths2.drop(columns = 'dates', inplace= True) | ||
deaths2.index = [x.strftime("%Y-%m-%d") for x in deaths2.index] | ||
deaths = deaths+ deaths2 | ||
deaths['total'] = deaths['confirmed'] +deaths['suspected'] | ||
# deaths['total'] = deaths['total'] | ||
deaths = deaths.query("total > 0") | ||
else: | ||
padded_region = '{:02d}'.format(current_region) | ||
reg_active = pd.DataFrame(data = {'cases': actives['data'][padded_region]}, index = dates) | ||
|
||
|
||
endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(current_region)) | ||
deaths = json.loads(endpointm.text) | ||
deaths = pd.DataFrame(deaths) | ||
|
||
|
||
|
||
deaths.index = pd.to_datetime(deaths.dates) | ||
deaths.drop(columns = 'dates', inplace= True) | ||
deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] | ||
|
||
#deaths['dates'] = deaths['dates'].apply(sissor) | ||
deaths['total'] = deaths['confirmed'] +deaths['suspected'] | ||
deaths = deaths.query("total > 0") | ||
|
||
|
||
#regions = reg_active.index | ||
common = list(set(deaths.index.to_list()).intersection(reg_active.index.to_list())) | ||
print(common) | ||
if len(common) <10: | ||
continue | ||
#reg_active = pd.pivot_table(reg_active, values='cases', index=['region', 'dates']) | ||
common = sorted(common) | ||
#print('common', len(common)) | ||
mu, sigma = 13, 12.7 #? | ||
mean = np.log(mu**2 / np.sqrt(mu**2 + sigma**2) ) | ||
std = np.sqrt(np.log(1 + sigma**2/mu**2) ) | ||
f = lognorm(s = std, scale = np.exp(mean)) | ||
days = 15 | ||
|
||
#RM_ac = reg_active.loc[current_region] | ||
RM_ac = reg_active | ||
RM_ac = RM_ac.loc[common] | ||
RM_deaths = deaths['total'].loc[common] | ||
|
||
d_cases = np.empty((RM_ac.shape[0],1)) | ||
for i in range(RM_ac.shape[0]):#RM_ac.shape[0] | ||
until_t_data = RM_ac.values[:i+1] | ||
reversed_arr = until_t_data[::-1].reshape(i+1) | ||
d_cases[i] = np.sum(f.pdf(np.linspace(0,i,i+1)) * reversed_arr) | ||
d_cases = d_cases[11:] | ||
|
||
RM_deaths = RM_deaths[11:] | ||
dcfr = RM_deaths.values/d_cases.reshape(len(common)-11) | ||
estimator_a = pd.read_csv('adjusted_cfr.csv').iloc[current_region-1]['cfr_mid']/(dcfr*100) # 1.4787 | ||
estimator_a = estimator_a[:-1] | ||
common = common[:-1] | ||
|
||
pro_a = special.expit(estimator_a) #logit | ||
X = np.linspace(1, estimator_a.shape[0], estimator_a.shape[0]) | ||
X = tf.convert_to_tensor(X.reshape(estimator_a.shape[0],-1), dtype=tf.float64) | ||
pro_a = pro_a#*0.9 | ||
pro_a = tf.convert_to_tensor(pro_a.reshape(estimator_a.shape[0],-1)) | ||
data = (X, pro_a) | ||
|
||
kernel = gpflow.kernels.SquaredExponential() | ||
mean_function = gpflow.mean_functions.Constant(f64(0.5))#c=1 , mean_function | ||
model = gpflow.models.GPR(data, kernel, mean_function = mean_function, noise_variance=0.01) | ||
optimizer = gpflow.optimizers.Scipy() | ||
optimizer.minimize(model.training_loss, model.trainable_variables) | ||
|
||
#if verbose : print(f"log posterior density at optimum: {model.log_posterior_density()}") | ||
|
||
model.kernel.variance.prior = tfd.LogNormal(f64(1.0), f64(1.0)) | ||
model.kernel.lengthscales.prior = tfd.LogNormal(f64(4.0), f64(0.5)) | ||
|
||
model.likelihood.variance.prior = tfd.HalfNormal( f64(0.5)) | ||
model.mean_function.c.prior = tfd.Uniform(f64(0.0), f64(1.0)) | ||
|
||
num_burnin_steps = ci_niter(1000) | ||
num_samples = ci_niter(10000) | ||
# Note that here we need model.trainable_parameters, not trainable_variables - only parameters can have priors! | ||
hmc_helper = gpflow.optimizers.SamplingHelper( | ||
model.log_posterior_density, model.trainable_parameters | ||
) | ||
|
||
hmc = tfp.mcmc.HamiltonianMonteCarlo( | ||
target_log_prob_fn=hmc_helper.target_log_prob_fn, num_leapfrog_steps=10, step_size=.01 | ||
) | ||
adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation( | ||
hmc, num_adaptation_steps=10, target_accept_prob=f64(0.75), adaptation_rate=0.1 | ||
) | ||
|
||
|
||
@tf.function | ||
def run_chain_fn(): | ||
return tfp.mcmc.sample_chain( | ||
num_results=num_samples, | ||
num_burnin_steps=num_burnin_steps, | ||
current_state=hmc_helper.current_state, | ||
kernel=adaptive_hmc,#hmc | ||
trace_fn=lambda _, pkr: pkr.inner_results.is_accepted, | ||
) | ||
|
||
|
||
samples, traces = run_chain_fn() | ||
parameter_samples = hmc_helper.convert_to_constrained_values(samples) | ||
|
||
param_to_name = {param: name for name, param in gpflow.utilities.parameter_dict(model).items()} | ||
|
||
xx = np.linspace(1, estimator_a.shape[0], estimator_a.shape[0])[:, None] | ||
posterior_samples = []#np.empty(10000,185) | ||
|
||
for i in range(0, 10000):#num_samples | ||
for var, var_samples in zip(hmc_helper.current_state, samples): | ||
var.assign(var_samples[i]) | ||
f = model.predict_f_samples(xx, 1) | ||
posterior_samples.append( f[0, :, :]) | ||
|
||
posterior_samples = np.hstack(posterior_samples)#, axis = 1) | ||
posterior_samples = posterior_samples.T | ||
|
||
mean = special.logit(np.mean(posterior_samples, 0)) | ||
low = np.percentile(special.logit(posterior_samples), 1, axis=0) | ||
high = np.percentile(special.logit(posterior_samples), 99, axis=0) | ||
|
||
# fig = plt.figure(figsize=(16, 9)) | ||
# ax = fig.add_subplot(1,1,1) | ||
# | ||
# xx = np.linspace(1, estimator_a.shape[0], estimator_a.shape[0])[:, None] | ||
# (line,) = ax.plot(xx, (1-special.logit(np.mean(posterior_samples, 0)))*100, lw=2) | ||
# ax.fill_between( | ||
# xx[:, 0], | ||
# (1-np.percentile(special.logit(posterior_samples), 1, axis=0))*100, | ||
# (1-np.percentile(special.logit(posterior_samples), 99, axis=0))*100, | ||
# color=line.get_color(), | ||
# alpha=0.2, | ||
# ) | ||
# ax.plot(xx, (1-estimator_a)*100, '.') | ||
# #ax.set_ylim((0,2.2)) | ||
# | ||
# realcommon = common[11:] | ||
# ticks = [i for i in range(len(common)-11) if i%7==0] | ||
# ticks_labels = [realcommon[i].replace('-','/') for i in range(len(common)-11) if i%7==0] | ||
# ticks += [range(len(common)-11)[-1]] | ||
# ticks_labels += [realcommon[-1].replace('-','/')] | ||
# ax.set_xticks(ticks) | ||
# plt.xticks(rotation=45) | ||
# ax.set_xticklabels(ticks_labels, fontdict = {'fontsize' : '8'}) | ||
# plt.ylabel('% de subreporte') | ||
# plt.title('Subreporte (%) a Region '+str(current_region) ) | ||
# | ||
# fig.savefig('subreporte_'+str(current_region)+'_'+common[-1]) | ||
|
||
final_data = pd.DataFrame(index = common[11:]) | ||
final_data['mean'] = mean | ||
final_data['low'] = low | ||
final_data['high'] = high | ||
final_data.to_csv('output/'+str(current_region)+'.csv') |