From b5e32db41691576f8798634386bd4a793aef12f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1s=20Villaseca?= Date: Wed, 27 Jan 2021 00:11:54 -0300 Subject: [PATCH] Initial commit --- Dockerfile | 18 +++ adjusted_cfr.csv | 18 +++ requirements.txt | 9 ++ underreport.py | 293 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 338 insertions(+) create mode 100644 Dockerfile create mode 100644 adjusted_cfr.csv create mode 100644 requirements.txt create mode 100644 underreport.py diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..894094c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04 + +WORKDIR /underreport + +RUN apt update -y +RUN apt upgrade -y +RUN apt install python3.8 python3.8-distutils python3.8-dev python3-pip wget -y +RUN ln -s /usr/bin/pip3 /usr/bin/pip +RUN ln -s /usr/bin/python3.8 /usr/bin/python +RUN wget https://bootstrap.pypa.io/get-pip.py +RUN python3.8 get-pip.py + +COPY requirements.txt . +RUN pip install -r requirements.txt +RUN mkdir /underreport/output + +COPY underreport.py /underreport +COPY adjusted_cfr.csv /underreport \ No newline at end of file diff --git a/adjusted_cfr.csv b/adjusted_cfr.csv new file mode 100644 index 0000000..302fa42 --- /dev/null +++ b/adjusted_cfr.csv @@ -0,0 +1,18 @@ +"iso3c","cfr_mid","cfr_low","cfr_high" +1,0.971792800536214,0.81365988948165,1.18470876170315 +2,0.977803477529202,0.818110179878572,1.19297074259223 +3,1.20969819787998,1.01672872047031,1.46003854011842 +4,1.400174924698,1.1792411500967,1.68067357560321 +5,1.58120251660305,1.33357490240717,1.89049905241268 +6,1.41194695458005,1.18953009591815,1.69366665937661 +7,1.44679267858955,1.2194686154977,1.73365260703696 +8,1.39845908055984,1.17794342806622,1.67822427584619 +9,1.47558127122979,1.24356659330543,1.76815316907781 +10,1.34347763568619,1.13058459180382,1.61588205184933 +11,1.13482149758689,0.952660393284068,1.37383951582361 +12,1.3762376047948,1.15857539806476,1.65328313832609 +13,1.30409694476306,1.0966113989631,1.57061421498939 +14,1.4787506749354,1.24637014448068,1.77140290897546 +15,1.27979933399993,1.07747462571867,1.54002099953576 +16,1.57328843885159,1.32745609587598,1.88028680872675 +17,1.36170929426836,1.14617841134316,1.63656496040202 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0464ae0 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +gpflow==2.1.3 +pandas +requests +matplotlib +numpy==1.16.4 +gast==0.3.3 +six==1.12.0 +tensorflow==2.3.1 +tensorflow_probability==0.11.1 \ No newline at end of file diff --git a/underreport.py b/underreport.py new file mode 100644 index 0000000..ae56e74 --- /dev/null +++ b/underreport.py @@ -0,0 +1,293 @@ +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import tensorflow as tf +import tensorflow_probability as tfp +from tensorflow_probability import distributions as tfd +from scipy import special +from scipy.stats import lognorm +import gpflow +from gpflow.ci_utils import ci_niter +from gpflow import set_trainable +import requests +import json +import sys, getopt + +gpflow.config.set_default_float(np.float64) +gpflow.config.set_default_jitter(1e-4) +gpflow.config.set_default_summary_fmt("notebook") +# convert to float64 for tfp to play nicely with gpflow in 64 +f64 = gpflow.utilities.to_default_float +tf.random.set_seed(123) +# sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True)) + + + +def underreport_estimate(cases, deaths): + pass +def get_regional_deaths(region): + endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(region)) + deaths = json.loads(endpointm.text) + deaths = pd.DataFrame(deaths) + deaths.index = pd.to_datetime(deaths.dates) + deaths.drop(columns = 'dates', inplace= True) + deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] + deaths['total'] = deaths['confirmed'] +deaths['suspected'] + deaths['total'] = deaths['total'] + return deaths + +def get_nacional_deaths(): + endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state=16') + deaths = json.loads(endpointm.text) + deaths = pd.DataFrame(deaths) + deaths.index = pd.to_datetime(deaths.dates) + deaths.drop(columns = 'dates', inplace= True) + deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] + + for i in range(1,16): + endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(i)) + deaths2 = json.loads(endpointm.text) + deaths2 = pd.DataFrame(deaths2) + deaths2.index = pd.to_datetime(deaths2.dates) + deaths2.drop(columns = 'dates', inplace= True) + deaths2.index = [x.strftime("%Y-%m-%d") for x in deaths2.index] + deaths = deaths+ deaths2 + deaths['total'] = deaths['confirmed'] +deaths['suspected'] + deaths['total'] = deaths['total'] + return deaths + +def delay_correction(): + mu, sigma = 13, 12.7 #? + mean = np.log(mu**2 / np.sqrt(mu**2 + sigma**2) ) + std = np.sqrt(np.log(1 + sigma**2/mu**2) ) + f = lognorm(s = std, scale = np.exp(mean)) + days = 15 + + pass + +def sissor(x): + return x[:10] + +def args_parser(argv): + from_region = '' + to_region = '' + try: + opts, args = getopt.getopt(argv,"hf:t:n:",["from=","to=", "gpun"]) + except getopt.GetoptError: + print( 'underreport.py -f -t -n ') + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print( 'underreport.py -f -t -n ') + + sys.exit() + elif opt in ("-f", "--ifile"): + from_region = arg + elif opt in ("-t", "--ofile"): + to_region = arg + elif opt in ("-n", "--ofile"): + ngpu = arg + + print( 'Computing underreporting from region number "', from_region) + print( 'to region number ', to_region) + return from_region, to_region, ngpu + +#config = tf.compat.v1.ConfigProto(gpu_options=tf.compat.v1.GPUOptions(allow_growth=True)) +#gpflow.reset_default_session(config=config) +endpointnew = requests.get('http://192.168.2.223:5006/getNewCasesAllStates') +actives = json.loads(endpointnew.text) +dates = pd.to_datetime(actives['dates']) +dates = [x.strftime("%Y-%m-%d") for x in dates] +# Parsing args +print(sys.argv[1:]) +from_region, to_region, gpun = args_parser(sys.argv[1:]) + + +ssess =tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(allow_soft_placement=True)) + + +with tf.device('/gpu:'+gpun): + + + for current_region in range(int(from_region),int(to_region)+1): + #DATA + #try: + if current_region == 17: + endpointnew = requests.get('http://192.168.2.223:5006/getNationalNewCases') + actives = json.loads(endpointnew.text) + dates = pd.to_datetime(actives['dates']) + dates = [x.strftime("%Y-%m-%d") for x in dates] + reg_active = pd.DataFrame(data = {'cases': actives['cases']}, index = dates) + + + endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state=16') + deaths = json.loads(endpointm.text) + deaths = pd.DataFrame(deaths) + deaths.index = pd.to_datetime(deaths.dates) + deaths.drop(columns = 'dates', inplace= True) + deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] + + for i in range(1,16): + endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(i)) + deaths2 = json.loads(endpointm.text) + deaths2 = pd.DataFrame(deaths2) + deaths2.index = pd.to_datetime(deaths2.dates) + deaths2.drop(columns = 'dates', inplace= True) + deaths2.index = [x.strftime("%Y-%m-%d") for x in deaths2.index] + deaths = deaths+ deaths2 + deaths['total'] = deaths['confirmed'] +deaths['suspected'] + # deaths['total'] = deaths['total'] + deaths = deaths.query("total > 0") + else: + padded_region = '{:02d}'.format(current_region) + reg_active = pd.DataFrame(data = {'cases': actives['data'][padded_region]}, index = dates) + + + endpointm = requests.get('http://192.168.2.223:5006/getDeathsByState?state='+str(current_region)) + deaths = json.loads(endpointm.text) + deaths = pd.DataFrame(deaths) + + + + deaths.index = pd.to_datetime(deaths.dates) + deaths.drop(columns = 'dates', inplace= True) + deaths.index = [x.strftime("%Y-%m-%d") for x in deaths.index] + + #deaths['dates'] = deaths['dates'].apply(sissor) + deaths['total'] = deaths['confirmed'] +deaths['suspected'] + deaths = deaths.query("total > 0") + + + #regions = reg_active.index + common = list(set(deaths.index.to_list()).intersection(reg_active.index.to_list())) + print(common) + if len(common) <10: + continue + #reg_active = pd.pivot_table(reg_active, values='cases', index=['region', 'dates']) + common = sorted(common) + #print('common', len(common)) + mu, sigma = 13, 12.7 #? + mean = np.log(mu**2 / np.sqrt(mu**2 + sigma**2) ) + std = np.sqrt(np.log(1 + sigma**2/mu**2) ) + f = lognorm(s = std, scale = np.exp(mean)) + days = 15 + + #RM_ac = reg_active.loc[current_region] + RM_ac = reg_active + RM_ac = RM_ac.loc[common] + RM_deaths = deaths['total'].loc[common] + + d_cases = np.empty((RM_ac.shape[0],1)) + for i in range(RM_ac.shape[0]):#RM_ac.shape[0] + until_t_data = RM_ac.values[:i+1] + reversed_arr = until_t_data[::-1].reshape(i+1) + d_cases[i] = np.sum(f.pdf(np.linspace(0,i,i+1)) * reversed_arr) + d_cases = d_cases[11:] + + RM_deaths = RM_deaths[11:] + dcfr = RM_deaths.values/d_cases.reshape(len(common)-11) + estimator_a = pd.read_csv('adjusted_cfr.csv').iloc[current_region-1]['cfr_mid']/(dcfr*100) # 1.4787 + estimator_a = estimator_a[:-1] + common = common[:-1] + + pro_a = special.expit(estimator_a) #logit + X = np.linspace(1, estimator_a.shape[0], estimator_a.shape[0]) + X = tf.convert_to_tensor(X.reshape(estimator_a.shape[0],-1), dtype=tf.float64) + pro_a = pro_a#*0.9 + pro_a = tf.convert_to_tensor(pro_a.reshape(estimator_a.shape[0],-1)) + data = (X, pro_a) + + kernel = gpflow.kernels.SquaredExponential() + mean_function = gpflow.mean_functions.Constant(f64(0.5))#c=1 , mean_function + model = gpflow.models.GPR(data, kernel, mean_function = mean_function, noise_variance=0.01) + optimizer = gpflow.optimizers.Scipy() + optimizer.minimize(model.training_loss, model.trainable_variables) + + #if verbose : print(f"log posterior density at optimum: {model.log_posterior_density()}") + + model.kernel.variance.prior = tfd.LogNormal(f64(1.0), f64(1.0)) + model.kernel.lengthscales.prior = tfd.LogNormal(f64(4.0), f64(0.5)) + + model.likelihood.variance.prior = tfd.HalfNormal( f64(0.5)) + model.mean_function.c.prior = tfd.Uniform(f64(0.0), f64(1.0)) + + num_burnin_steps = ci_niter(1000) + num_samples = ci_niter(10000) + # Note that here we need model.trainable_parameters, not trainable_variables - only parameters can have priors! + hmc_helper = gpflow.optimizers.SamplingHelper( + model.log_posterior_density, model.trainable_parameters + ) + + hmc = tfp.mcmc.HamiltonianMonteCarlo( + target_log_prob_fn=hmc_helper.target_log_prob_fn, num_leapfrog_steps=10, step_size=.01 + ) + adaptive_hmc = tfp.mcmc.SimpleStepSizeAdaptation( + hmc, num_adaptation_steps=10, target_accept_prob=f64(0.75), adaptation_rate=0.1 + ) + + + @tf.function + def run_chain_fn(): + return tfp.mcmc.sample_chain( + num_results=num_samples, + num_burnin_steps=num_burnin_steps, + current_state=hmc_helper.current_state, + kernel=adaptive_hmc,#hmc + trace_fn=lambda _, pkr: pkr.inner_results.is_accepted, + ) + + + samples, traces = run_chain_fn() + parameter_samples = hmc_helper.convert_to_constrained_values(samples) + + param_to_name = {param: name for name, param in gpflow.utilities.parameter_dict(model).items()} + + xx = np.linspace(1, estimator_a.shape[0], estimator_a.shape[0])[:, None] + posterior_samples = []#np.empty(10000,185) + + for i in range(0, 10000):#num_samples + for var, var_samples in zip(hmc_helper.current_state, samples): + var.assign(var_samples[i]) + f = model.predict_f_samples(xx, 1) + posterior_samples.append( f[0, :, :]) + + posterior_samples = np.hstack(posterior_samples)#, axis = 1) + posterior_samples = posterior_samples.T + + mean = special.logit(np.mean(posterior_samples, 0)) + low = np.percentile(special.logit(posterior_samples), 1, axis=0) + high = np.percentile(special.logit(posterior_samples), 99, axis=0) + + # fig = plt.figure(figsize=(16, 9)) + # ax = fig.add_subplot(1,1,1) + # + # xx = np.linspace(1, estimator_a.shape[0], estimator_a.shape[0])[:, None] + # (line,) = ax.plot(xx, (1-special.logit(np.mean(posterior_samples, 0)))*100, lw=2) + # ax.fill_between( + # xx[:, 0], + # (1-np.percentile(special.logit(posterior_samples), 1, axis=0))*100, + # (1-np.percentile(special.logit(posterior_samples), 99, axis=0))*100, + # color=line.get_color(), + # alpha=0.2, + # ) + # ax.plot(xx, (1-estimator_a)*100, '.') + # #ax.set_ylim((0,2.2)) + # + # realcommon = common[11:] + # ticks = [i for i in range(len(common)-11) if i%7==0] + # ticks_labels = [realcommon[i].replace('-','/') for i in range(len(common)-11) if i%7==0] + # ticks += [range(len(common)-11)[-1]] + # ticks_labels += [realcommon[-1].replace('-','/')] + # ax.set_xticks(ticks) + # plt.xticks(rotation=45) + # ax.set_xticklabels(ticks_labels, fontdict = {'fontsize' : '8'}) + # plt.ylabel('% de subreporte') + # plt.title('Subreporte (%) a Region '+str(current_region) ) + # + # fig.savefig('subreporte_'+str(current_region)+'_'+common[-1]) + + final_data = pd.DataFrame(index = common[11:]) + final_data['mean'] = mean + final_data['low'] = low + final_data['high'] = high + final_data.to_csv('output/'+str(current_region)+'.csv')