From 327e1159221000f1a39231f148aadd04e5ad080e Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Mon, 28 Jan 2019 20:15:16 +0000 Subject: [PATCH 01/20] Fixing issues with high dimensional random variables --- abcpy/acceptedparametersmanager.py | 9 +++++---- abcpy/continuousmodels.py | 9 +-------- abcpy/inferences.py | 3 ++- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/abcpy/acceptedparametersmanager.py b/abcpy/acceptedparametersmanager.py index ea265015..d0400e60 100644 --- a/abcpy/acceptedparametersmanager.py +++ b/abcpy/acceptedparametersmanager.py @@ -92,7 +92,8 @@ def get_mapping(self, models, is_root=True, index=0): Returns ------- list - The first entry corresponds to the mapping of the root model, as well as all its parents. The second entry corresponds to the next index in depth-first search. + The first entry corresponds to the mapping of the root model, as well as all its parents. The second entry + corresponds to the next index in depth-first search. """ # Implement a dfs to discover all nodes of the model @@ -104,9 +105,9 @@ def get_mapping(self, models, is_root=True, index=0): # Only parameters that are neither root nor Hyperparameters are included in the mapping if(not(is_root) and not(isinstance(model, ModelResultingFromOperation))): - for i in range(model.get_output_dimension()): - mapping.append((model, index)) - index+=1 + #for i in range(model.get_output_dimension()): + mapping.append((model, index)) + index+=1 for parent in model.get_input_models(): parent_mapping, index = self.get_mapping([parent], is_root= False, index=index) diff --git a/abcpy/continuousmodels.py b/abcpy/continuousmodels.py index bf8ada5d..fc72d12c 100644 --- a/abcpy/continuousmodels.py +++ b/abcpy/continuousmodels.py @@ -346,20 +346,13 @@ def __init__(self, parameters, name='Multivariate Normal'): raise TypeError('Input for Multivariate Normal has to be of type list.') if len(parameters)<2: raise ValueError('Input for Multivariate Normal has to be of length 2.') - if not isinstance(parameters[0], list): - raise TypeError('Input for mean of MultivarateNormal has to be of type list.') - if not isinstance(parameters[1], list): - raise TypeError('Input for covariance of MultivarateNormal has to be of type list.') - - ## This part confuses me as you say mean may not be list!! mean = parameters[0] if isinstance(mean, list): self._dimension = len(mean) - input_parameters = InputConnector.from_list(parameters) elif isinstance(mean, ProbabilisticModel): self._dimension = mean.get_output_dimension() - input_parameters = parameters + input_parameters = InputConnector.from_list(parameters) super(MultivariateNormal, self).__init__(input_parameters, name) self.visited = False diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 7714f82d..22a73937 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -481,7 +481,8 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples # The calculation of cov_mats needs the new weights and new parameters self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters = new_parameters, accepted_weights=new_weights) - # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once, instead of collecting them at each kernel in each step + # The parameters relevant to each kernel have to be used to calculate n_sample times. It is therefore more efficient to broadcast these parameters once, + # instead of collecting them at each kernel in each step kernel_parameters = [] for kernel in self.kernel.kernels: kernel_parameters.append( From 6b5d436cd184b0ccdfd6ba81282d4843419083cd Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Tue, 29 Jan 2019 17:45:41 +0000 Subject: [PATCH 02/20] Updating for high dimensional random variables --- abcpy/acceptedparametersmanager.py | 2 + abcpy/continuousmodels.py | 6 +-- abcpy/graphtools.py | 14 ++++--- abcpy/inferences.py | 3 +- abcpy/perturbationkernel.py | 59 ++++++++++++++++++++++-------- abcpy/probabilisticmodels.py | 1 - 6 files changed, 58 insertions(+), 27 deletions(-) diff --git a/abcpy/acceptedparametersmanager.py b/abcpy/acceptedparametersmanager.py index d0400e60..c4517b52 100644 --- a/abcpy/acceptedparametersmanager.py +++ b/abcpy/acceptedparametersmanager.py @@ -140,6 +140,7 @@ def get_accepted_parameters_bds_values(self, models): # The self.accepted_parameters_bds.value() list has dimensions d x n_steps, where d is the number of free parameters accepted_bds_values = [[] for i in range(len(self.accepted_parameters_bds.value()))] + # Add all columns that correspond to desired parameters to the list that is returned for model in models: for prob_model, index in mapping: @@ -147,6 +148,7 @@ def get_accepted_parameters_bds_values(self, models): for i in range(len(self.accepted_parameters_bds.value())): accepted_bds_values[i].append(self.accepted_parameters_bds.value()[i][index]) accepted_bds_values = [np.array(x).reshape(-1, ) for x in accepted_bds_values] + return accepted_bds_values def _reset_flags(self, models=None): diff --git a/abcpy/continuousmodels.py b/abcpy/continuousmodels.py index fc72d12c..ca32a10e 100644 --- a/abcpy/continuousmodels.py +++ b/abcpy/continuousmodels.py @@ -87,7 +87,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com samples = np.zeros(shape=(k, self.get_output_dimension())) for j in range(0, self.get_output_dimension()): samples[:, j] = rng.uniform(input_values[j], input_values[j+self.get_output_dimension()], k) - return [np.array(x) for x in samples] + return [np.array(x).reshape(-1,) for x in samples] def get_output_dimension(self): @@ -189,7 +189,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com mu = input_values[0] sigma = input_values[1] result = np.array(rng.normal(mu, sigma, k)) - return [np.array([x]) for x in result] + return [np.array([x]).reshape(-1,) for x in result] def get_output_dimension(self): @@ -270,7 +270,7 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com mean = input_values[0] df = input_values[1] result = np.array((rng.standard_t(df,k)+mean)) - return [np.array([x]) for x in result] + return [np.array([x]).reshape(-1,) for x in result] def _check_input(self, input_values): diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index edfb4de5..0d6d653c 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -134,6 +134,7 @@ def _recursion_pdf_of_prior(self, models, parameters, mapping=None, is_root=True # At the beginning of calculation, obtain the mapping if(is_root): mapping, garbage_index = self._get_mapping() + # The pdf of each root model is first calculated seperately result = [1.]*len(models) @@ -146,9 +147,9 @@ def _recursion_pdf_of_prior(self, models, parameters, mapping=None, is_root=True for mapped_model, model_index in mapping: if(mapped_model==model): parameter_index = model_index - for j in range(model.get_output_dimension()): - relevant_parameters.append(parameters[parameter_index]) - parameter_index+=1 + #for j in range(model.get_output_dimension()): + relevant_parameters.append(parameters[parameter_index]) + #parameter_index+=1 break if(len(relevant_parameters)==1): relevant_parameters = relevant_parameters[0] @@ -210,7 +211,7 @@ def _get_mapping(self, models=None, index=0, is_not_root=False): # If this model corresponds to an unvisited free parameter, add it to the mapping if(is_not_root and not(model.visited) and not(isinstance(model, Hyperparameter)) and not(isinstance(model, ModelResultingFromOperation))): mapping.append((model, index)) - index+=model.get_output_dimension() + index+= 1 #model.get_output_dimension() # Add all parents to the mapping, if applicable for parent in model.get_input_models(): parent_mapping, index = self._get_mapping([parent], index=index, is_not_root=True) @@ -318,10 +319,11 @@ def set_parameters(self, parameters, models=None, index=0, is_root=True): for model in models: # New parameters should only be set in case we are not at the root if not is_root and not isinstance(model, ModelResultingFromOperation): - new_output_values = np.array(parameters[index:index + model.get_output_dimension()]) + #new_output_values = np.array(parameters[index:index + model.get_output_dimension()]) + new_output_values = np.array(parameters[index]).reshape(-1,) if not model.set_output_values(new_output_values): return [False, index] - index += model.get_output_dimension() + index += 1 #model.get_output_dimension() model.visited = True # New parameters for all parents are set using a depth-first search diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 22a73937..b7af75dc 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -574,9 +574,8 @@ def _resample_parameter(self, rng, npc=None): "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}". format(counter, distance, float(self.epsilon)) ) - return (theta, distance, counter) - return None + return (theta, distance, counter) def _calculate_weight(self, theta, npc=None): """ diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index 4d920747..802eb265 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -201,9 +201,9 @@ def update(self, accepted_parameters_manager, row_index, rng=np.random.RandomSta index=0 for model in kernel.models: model_values = [] - for j in range(model.get_output_dimension()): - model_values.append(perturbed_values[kernel_index][index]) - index+=1 + #for j in range(model.get_output_dimension()): + model_values.append(perturbed_values[kernel_index][index]) + index+=1 perturbed_values_including_models.append((model, model_values)) return perturbed_values_including_models @@ -271,8 +271,15 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): if(accepted_parameters_manager.accepted_weights_bds is not None): weights = accepted_parameters_manager.accepted_weights_bds.value() - cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), - aweights=weights.reshape(-1).astype(float), rowvar=False) + continuous_model = [[] for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] + for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): + if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], np.float): + continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] + else: + continuous_model[i] = np.concatenate(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) + cov = np.cov(np.array(continuous_model).astype(float),aweights=weights.reshape(-1).astype(float), rowvar=False) + #cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), + # aweights=weights.reshape(-1).astype(float), rowvar=False) else: if(not(accepted_parameters_manager.accepted_parameters_bds.value().shape[1]>1)): cov = np.var(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float)) @@ -303,13 +310,31 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra The perturbed parameter values. """ - # Get all current parameter values relevant for this model + # Get all current parameter values relevant for this model and the structure continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] - # Perturb - continuous_model_values = np.array(continuous_model_values).astype(float) - cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) - perturbed_continuous_values = rng.multivariate_normal(continuous_model_values[row_index], cov) + if isinstance(continuous_model_values[row_index][0], np.float): + # Perturb + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + continuous_model_values = np.array(continuous_model_values).astype(float) + + # Perturbed values anc split according to the structure + perturbed_continuous_values = rng.multivariate_normal(continuous_model_values[row_index], cov) + else: + # Learn the structure + struct = [[] for i in range(len(continuous_model_values[row_index]))] + for i in range(len(continuous_model_values[row_index])): + struct[i] = continuous_model_values[row_index][i].shape[0] + struct = np.array(struct).cumsum() + continuous_model_values = np.concatenate(continuous_model_values[row_index]) + + # Perturb + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + continuous_model_values = np.array(continuous_model_values).astype(float) + + # Perturbed values anc split according to the structure + perturbed_continuous_values = np.split(rng.multivariate_normal(continuous_model_values, cov), struct)[:-1] + return perturbed_continuous_values @@ -334,12 +359,16 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): """ # Gets the relevant accepted parameters from the manager in order to calculate the pdf + continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] - mean = np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][index]).astype(float) - - cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) - - return multivariate_normal(mean, cov, allow_singular=True).pdf(x) + if isinstance(continuous_model_values[index][0], np.float): + mean = np.array(continuous_model_values[index]).astype(float) + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + return multivariate_normal(mean, cov, allow_singular=True).pdf(x) + else: + mean = np.array(np.concatenate(continuous_model_values[index])).astype(float) + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + return multivariate_normal(mean, cov, allow_singular=True).pdf(np.concatenate(x)) class MultivariateStudentTKernel(PerturbationKernel, ContinuousKernel): diff --git a/abcpy/probabilisticmodels.py b/abcpy/probabilisticmodels.py index 07bd956d..92df078c 100644 --- a/abcpy/probabilisticmodels.py +++ b/abcpy/probabilisticmodels.py @@ -394,7 +394,6 @@ def set_output_values(self, values): boolean Returns True if it was possible to set the values, false otherwise. """ - if not isinstance(values, np.ndarray): raise TypeError('Elements of input list are not of type numpy array.') if values.shape[0] != self.get_output_dimension(): From 2bb41c7ab54b99f2206e07757f3ab45eb05911d8 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Tue, 29 Jan 2019 17:59:02 +0000 Subject: [PATCH 03/20] Making sure all tests pass --- abcpy/perturbationkernel.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index 802eb265..47ab8446 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -273,7 +273,7 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): weights = accepted_parameters_manager.accepted_weights_bds.value() continuous_model = [[] for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): - if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], np.float): + if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] else: continuous_model[i] = np.concatenate(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) @@ -313,7 +313,7 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra # Get all current parameter values relevant for this model and the structure continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] - if isinstance(continuous_model_values[row_index][0], np.float): + if isinstance(continuous_model_values[row_index][0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): # Perturb cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) continuous_model_values = np.array(continuous_model_values).astype(float) @@ -361,11 +361,12 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): # Gets the relevant accepted parameters from the manager in order to calculate the pdf continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] - if isinstance(continuous_model_values[index][0], np.float): + if isinstance(continuous_model_values[index][0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): mean = np.array(continuous_model_values[index]).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) return multivariate_normal(mean, cov, allow_singular=True).pdf(x) else: + print(type(continuous_model_values[index][0])) mean = np.array(np.concatenate(continuous_model_values[index])).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) return multivariate_normal(mean, cov, allow_singular=True).pdf(np.concatenate(x)) From 00ea66bc9e0976881d1253102541fb1281c986fd Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Tue, 29 Jan 2019 18:42:37 +0000 Subject: [PATCH 04/20] Fixing Multivariate StudentT kernel --- abcpy/perturbationkernel.py | 99 ++++++++++++++++++++++++++++--------- 1 file changed, 75 insertions(+), 24 deletions(-) diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index 47ab8446..cae23617 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -407,9 +407,20 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): if(accepted_parameters_manager.accepted_weights_bds is not None): weights = np.array(accepted_parameters_manager.accepted_weights_bds.value()) - cov = np.cov( - np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), aweights=weights.reshape(-1).astype(float), - rowvar=False) + continuous_model = [[] for i in + range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] + for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): + if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], + (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): + continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] + else: + continuous_model[i] = np.concatenate( + accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) + cov = np.cov(np.array(continuous_model).astype(float), aweights=weights.reshape(-1).astype(float), + rowvar=False) + # cov = np.cov( + # np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), aweights=weights.reshape(-1).astype(float), + # rowvar=False) else: if(not(accepted_parameters_manager.accepted_parameters_bds.value().shape[1]>1)): cov = np.var(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float)) @@ -443,22 +454,46 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra # Get all parameters relevant to this kernel continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] - # Perturb - continuous_model_values = np.array(continuous_model_values) - cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]) - p = len(continuous_model_values[row_index]) + if isinstance(continuous_model_values[row_index][0], + (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): + # Perturb + continuous_model_values = np.array(continuous_model_values) + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + p = len(continuous_model_values[row_index]) + + if (self.df == np.inf): + chisq = 1.0 + else: + chisq = rng.chisquare(self.df, 1) / self.df + chisq = chisq.reshape(-1, 1).repeat(p, axis=1) - if(self.df==np.inf): - chisq = 1.0 + mvn = rng.multivariate_normal(np.zeros(p), cov.astype(float), 1) + perturbed_continuous_values = continuous_model_values[row_index] + np.divide(mvn, np.sqrt(chisq))[0] else: - chisq = rng.chisquare(self.df, 1)/self.df - chisq = chisq.reshape(-1,1).repeat(p, axis=1) + # Learn the structure + struct = [[] for i in range(len(continuous_model_values[row_index]))] + for i in range(len(continuous_model_values[row_index])): + struct[i] = continuous_model_values[row_index][i].shape[0] + struct = np.array(struct).cumsum() + continuous_model_values = np.concatenate(continuous_model_values[row_index]) + # Perturb + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + p = len(continuous_model_values[row_index]) - mvn = rng.multivariate_normal(np.zeros(p), cov.astype(float), 1) - perturbed_continuous_values = continuous_model_values[row_index]+np.divide(mvn, np.sqrt(chisq))[0] - return perturbed_continuous_values + if (self.df == np.inf): + chisq = 1.0 + else: + chisq = rng.chisquare(self.df, 1) / self.df + chisq = chisq.reshape(-1, 1).repeat(p, axis=1) + mvn = rng.multivariate_normal(np.zeros(p), cov.astype(float), 1) + perturbed_continuous_values = continuous_model_values[row_index] + np.divide(mvn, np.sqrt(chisq))[0] + + # Perturbed values anc split according to the structure + perturbed_continuous_values = np.split(perturbed_continuous_values, struct)[:-1] + + return perturbed_continuous_values def pdf(self, accepted_parameters_manager, kernel_index, index, x): """Calculates the pdf of the kernel. @@ -479,21 +514,37 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): float The pdf evaluated at point x. """ + continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] - mean = np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][index]).astype(float) - cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + if isinstance(continuous_model_values[index][0], + (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): + mean = np.array(continuous_model_values[index]).astype(float) + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) + + v = self.df + p = len(mean) + + numerator = gamma((v + p) / 2) + denominator = gamma(v / 2) * pow(v * np.pi, p / 2.) * np.sqrt(abs(np.linalg.det(cov))) + normalizing_const = numerator / denominator + tmp = 1 + 1 / v * np.dot(np.dot(np.transpose(x - mean), np.linalg.inv(cov)), (x - mean)) + density = normalizing_const * pow(tmp, -((v + p) / 2.)) - v = self.df - p = len(mean) + return density + else: + mean = np.array(np.concatenate(continuous_model_values[index])).astype(float) + cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) - numerator = gamma((v + p) / 2) - denominator = gamma(v / 2) * pow(v * np.pi, p / 2.) * np.sqrt(abs(np.linalg.det(cov))) - normalizing_const = numerator / denominator - tmp = 1 + 1 / v * np.dot(np.dot(np.transpose(x - mean), np.linalg.inv(cov)), (x - mean)) - density = normalizing_const * pow(tmp, -((v + p) / 2.)) + v = self.df + p = len(mean) - return density + numerator = gamma((v + p) / 2) + denominator = gamma(v / 2) * pow(v * np.pi, p / 2.) * np.sqrt(abs(np.linalg.det(cov))) + normalizing_const = numerator / denominator + tmp = 1 + 1 / v * np.dot(np.dot(np.transpose(np.concatenate(x) - mean), np.linalg.inv(cov)), (np.concatenate(x) - mean)) + density = normalizing_const * pow(tmp, -((v + p) / 2.)) + return density class RandomWalkKernel(PerturbationKernel, DiscreteKernel): def __init__(self, models): From d27eaf16a60c18a2cc0ab13701dfb4500066ccb8 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Tue, 29 Jan 2019 23:52:29 +0000 Subject: [PATCH 05/20] Correcting the fix for JointPerturbationKernel --- abcpy/acceptedparametersmanager.py | 2 +- abcpy/graphtools.py | 2 ++ abcpy/perturbationkernel.py | 30 +++++++++++++++--------------- 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/abcpy/acceptedparametersmanager.py b/abcpy/acceptedparametersmanager.py index c4517b52..06045403 100644 --- a/abcpy/acceptedparametersmanager.py +++ b/abcpy/acceptedparametersmanager.py @@ -147,7 +147,7 @@ def get_accepted_parameters_bds_values(self, models): if(model==prob_model): for i in range(len(self.accepted_parameters_bds.value())): accepted_bds_values[i].append(self.accepted_parameters_bds.value()[i][index]) - accepted_bds_values = [np.array(x).reshape(-1, ) for x in accepted_bds_values] + #accepted_bds_values = [np.array(x).reshape(-1, ) for x in accepted_bds_values] return accepted_bds_values diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index 0d6d653c..2e730066 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -243,6 +243,8 @@ def _get_names_and_parameters(self): for model, index in mapping: return_value.append((model.name, self.accepted_parameters_manager.get_accepted_parameters_bds_values([model]))) + print(return_value) + return return_value diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index cae23617..c29825aa 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -321,6 +321,7 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra # Perturbed values anc split according to the structure perturbed_continuous_values = rng.multivariate_normal(continuous_model_values[row_index], cov) else: + #print('Hello') # Learn the structure struct = [[] for i in range(len(continuous_model_values[row_index]))] for i in range(len(continuous_model_values[row_index])): @@ -366,7 +367,6 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) return multivariate_normal(mean, cov, allow_singular=True).pdf(x) else: - print(type(continuous_model_values[index][0])) mean = np.array(np.concatenate(continuous_model_values[index])).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) return multivariate_normal(mean, cov, allow_singular=True).pdf(np.concatenate(x)) @@ -452,14 +452,14 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra """ # Get all parameters relevant to this kernel - continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] + continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][row_index] - if isinstance(continuous_model_values[row_index][0], + if isinstance(continuous_model_values[0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): # Perturb continuous_model_values = np.array(continuous_model_values) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) - p = len(continuous_model_values[row_index]) + p = len(continuous_model_values) if (self.df == np.inf): chisq = 1.0 @@ -468,18 +468,18 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra chisq = chisq.reshape(-1, 1).repeat(p, axis=1) mvn = rng.multivariate_normal(np.zeros(p), cov.astype(float), 1) - perturbed_continuous_values = continuous_model_values[row_index] + np.divide(mvn, np.sqrt(chisq))[0] + perturbed_continuous_values = continuous_model_values + np.divide(mvn, np.sqrt(chisq))[0] else: # Learn the structure - struct = [[] for i in range(len(continuous_model_values[row_index]))] - for i in range(len(continuous_model_values[row_index])): - struct[i] = continuous_model_values[row_index][i].shape[0] + struct = [[] for i in range(len(continuous_model_values))] + for i in range(len(continuous_model_values)): + struct[i] = continuous_model_values[i].shape[0] struct = np.array(struct).cumsum() - continuous_model_values = np.concatenate(continuous_model_values[row_index]) + continuous_model_values = np.concatenate(continuous_model_values) # Perturb cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) - p = len(continuous_model_values[row_index]) + p = len(continuous_model_values) if (self.df == np.inf): chisq = 1.0 @@ -488,7 +488,7 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra chisq = chisq.reshape(-1, 1).repeat(p, axis=1) mvn = rng.multivariate_normal(np.zeros(p), cov.astype(float), 1) - perturbed_continuous_values = continuous_model_values[row_index] + np.divide(mvn, np.sqrt(chisq))[0] + perturbed_continuous_values = continuous_model_values + np.divide(mvn, np.sqrt(chisq))[0] # Perturbed values anc split according to the structure perturbed_continuous_values = np.split(perturbed_continuous_values, struct)[:-1] @@ -514,11 +514,11 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): float The pdf evaluated at point x. """ - continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] + continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][index] - if isinstance(continuous_model_values[index][0], + if isinstance(continuous_model_values[0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): - mean = np.array(continuous_model_values[index]).astype(float) + mean = np.array(continuous_model_values).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) v = self.df @@ -532,7 +532,7 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): return density else: - mean = np.array(np.concatenate(continuous_model_values[index])).astype(float) + mean = np.array(np.concatenate(continuous_model_values)).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) v = self.df From 6aa749a4cac8bdde29abd74f2d871512187ab3b1 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 30 Jan 2019 13:58:07 +0000 Subject: [PATCH 06/20] Updates in Output journal files --- abcpy/graphtools.py | 2 - abcpy/inferences.py | 48 +++---- abcpy/output.py | 277 ++++++++++++++++++++------------------ tests/inferences_tests.py | 116 +++++++++------- 4 files changed, 238 insertions(+), 205 deletions(-) diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index 2e730066..0d6d653c 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -243,8 +243,6 @@ def _get_names_and_parameters(self): for model, index in mapping: return_value.append((model.name, self.accepted_parameters_manager.get_accepted_parameters_bds_values([model]))) - print(return_value) - return return_value diff --git a/abcpy/inferences.py b/abcpy/inferences.py index b7af75dc..ddcde60a 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -227,23 +227,24 @@ def sample(self, observations, n_samples, n_samples_per_param, epsilon, full_out rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) rng_pds = self.backend.parallelize(rng_arr) - accepted_parameters_and_counter_pds = self.backend.map(self._sample_parameter, rng_pds) - accepted_parameters_and_counter = self.backend.collect(accepted_parameters_and_counter_pds) - accepted_parameters, counter = [list(t) for t in zip(*accepted_parameters_and_counter)] + accepted_parameters_distances_counter_pds = self.backend.map(self._sample_parameter, rng_pds) + accepted_parameters_distances_counter = self.backend.collect(accepted_parameters_distances_counter_pds) + accepted_parameters, distances, counter = [list(t) for t in zip(*accepted_parameters_distances_counter)] for count in counter: self.simulation_counter+=count accepted_parameters = np.array(accepted_parameters) + distances = np.array(distances) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) - journal.add_parameters(accepted_parameters) - journal.add_weights(np.ones((n_samples, 1))) + + journal.add_weights(copy.deepcopy(np.ones((n_samples, 1)))) + journal.add_distances(copy.deepcopy(distances)) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) - journal.number_of_simulations.append(self.simulation_counter) return journal @@ -288,7 +289,7 @@ def _sample_parameter(self, rng, npc=None): "Needed {:4d} simulations to reach distance {:e} < epsilon = {:e}". format(counter, distance, float(self.epsilon)) ) - return (theta, counter) + return (theta, distance, counter) class PMCABC(BaseDiscrepancy, InferenceMethod): @@ -452,6 +453,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples params_and_dists_and_counter = self.backend.collect(params_and_dists_and_counter_pds) new_parameters, distances, counter = [list(t) for t in zip(*params_and_dists_and_counter)] new_parameters = np.array(new_parameters) + distances = np.array(distances) for count in counter: self.simulation_counter+=count @@ -503,13 +505,12 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples self.logger.info("Save configuration to output journal") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): - journal.add_parameters(accepted_parameters) - journal.add_weights(accepted_weights) + journal.add_distances(copy.deepcopy(distances)) + journal.add_weights(copy.deepcopy(accepted_weights)) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) - journal.number_of_simulations.append(self.simulation_counter) # Add epsilon_arr to the journal @@ -864,8 +865,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("Saving configuration to output journal") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): - journal.add_parameters(accepted_parameters) - journal.add_weights(accepted_weights) + journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(approx_likelihood_new_parameters) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) @@ -1277,7 +1277,6 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ if (full_output == 1 and aStep<= steps-1): ## Saving intermediate configuration to output journal. print('Saving after resampling') - journal.add_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() @@ -1306,7 +1305,6 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ if (full_output == 1 and aStep <= steps-1): ## Saving intermediate configuration to output journal. - journal.add_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() @@ -1316,7 +1314,6 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ # Add epsilon_arr, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if (full_output == 0) or (full_output ==1 and broken_preemptively and aStep<= steps-1): - journal.add_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) self.accepted_parameters_manager.update_broadcast(self.backend,accepted_parameters=accepted_parameters,accepted_weights=accepted_weights) @@ -1680,7 +1677,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if full_output == 1: self.logger.info("Saving intermediate configuration to output journal") - journal.add_parameters(copy.deepcopy(accepted_parameters)) + journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(accepted_cov_mats) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, @@ -1700,7 +1697,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # Add anneal_parameter, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if full_output == 0: - journal.add_parameters(copy.deepcopy(accepted_parameters)) + journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(accepted_cov_mats) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, @@ -2056,7 +2053,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): self.logger.info("Saving configuration to output journal.") - journal.add_parameters(copy.deepcopy(accepted_parameters)) + journal.add_distances(copy.deepcopy(accepted_dist)) journal.add_weights(np.ones(shape=(len(accepted_parameters), 1)) * (1 / len(accepted_parameters))) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) names_and_parameters = self._get_names_and_parameters() @@ -2368,7 +2365,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # print("INFO: Saving configuration to output journal.") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): - journal.add_parameters(copy.deepcopy(accepted_parameters)) + journal.add_distances(copy.deepcopy(accepted_dist)) journal.add_weights(copy.deepcopy(accepted_weights)) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, @@ -2637,7 +2634,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("Resampling") # Weighted resampling: index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace=1, p=new_weights) - accepted_parameters = accepted_parameters[index_resampled, :] + print(accepted_parameters) + accepted_parameters = accepted_parameters[index_resampled] new_weights = np.ones(shape=(n_samples), ) * (1.0 / n_samples) # Update the weights @@ -2676,8 +2674,9 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("Resampling parameters") params_and_ysim_pds = self.backend.map(self._accept_parameter, rng_and_index_pds) params_and_ysim = self.backend.collect(params_and_ysim_pds) - new_parameters, new_y_sim, counter = [list(t) for t in zip(*params_and_ysim)] + new_parameters, new_y_sim, distances, counter = [list(t) for t in zip(*params_and_ysim)] new_parameters = np.array(new_parameters) + distances = np.array(distances) for count in counter: self.simulation_counter+=count @@ -2689,7 +2688,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): self.logger.info("Saving configuration to output journal") self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) - journal.add_parameters(copy.deepcopy(accepted_parameters)) + + journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(copy.deepcopy(accepted_y_sim)) @@ -2846,5 +2846,5 @@ def _accept_parameter(self, rng_and_index, npc=None): else: self.set_parameters(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]) y_sim = self.accepted_y_sim_bds.value()[index] - - return (self.get_parameters(), y_sim, counter) \ No newline at end of file + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + return (self.get_parameters(), y_sim, distance, counter) \ No newline at end of file diff --git a/abcpy/output.py b/abcpy/output.py index cb63aeb8..c34d528b 100644 --- a/abcpy/output.py +++ b/abcpy/output.py @@ -32,13 +32,14 @@ def __init__(self, type): type=1 logs all generated information (reproducibily use). """ - self.parameters = [] + #self.parameters = [] + self.names_and_parameters = [] self.weights = [] self.distances = [] self.opt_values = [] self.configuration = {} - self.names_and_parameters = [] + if type not in [0, 1]: raise ValueError("Parameter type has not valid value.") @@ -47,8 +48,6 @@ def __init__(self, type): self.number_of_simulations =[] - - @classmethod def fromFile(cls, filename): """This method reads a saved journal from disk an returns it as an object. @@ -77,25 +76,6 @@ def fromFile(cls, filename): with open(filename, 'rb') as input: journal = pickle.load(input) return journal - - - - def add_parameters(self, params): - """ - Saves provided parameters by appending them to the journal. If type==0, old parameters get overwritten. - - Parameters - ---------- - params: numpy.array - nxp matrix containing n parameters of dimension p - """ - - if self._type == 0: - self.parameters = [params] - - if self._type == 1: - self.parameters.append(params) - def add_user_parameters(self, names_and_params): """ @@ -111,63 +91,6 @@ def add_user_parameters(self, names_and_params): else: self.names_and_parameters.append(dict(names_and_params)) - - def get_parameters(self, iteration=None): - """ - Returns the parameters from a sampling scheme. - - For intermediate results, pass the iteration. - - Parameters - ---------- - iteration: int - specify the iteration for which to return parameters - """ - - if iteration is None: - endp = len(self.parameters) - 1 - params = self.names_and_parameters[endp] - return params - else: - return self.names_and_parameters[iteration] - - - def _get_parameter_values(self): - """ - Returns the parameters in the order dictated by the graph structure. - - Returns - ------- - numpy.array: - The parameters of the model - """ - - endp = len(self.parameters)-1 - params = self.parameters[endp] - return params - - - - def get_weights(self, iteration=None): - """ - Returns the weights from a sampling scheme. - - For intermediate results, pass the iteration. - - Parameters - ---------- - iteration: int - specify the iteration for which to return weights - """ - - if iteration is None: - end = len(self.weights) - 1 - return self.weights[end] - else: - return self.weights[iteration] - - - def add_weights(self, weights): """ Saves provided weights by appending them to the journal. If type==0, old weights get overwritten. @@ -184,24 +107,6 @@ def add_weights(self, weights): if self._type == 1: self.weights.append(weights) - def get_distances(self, iteration=None): - """ - Returns the distances from a sampling scheme. - - For intermediate results, pass the iteration. - - Parameters - ---------- - iteration: int - specify the iteration for which to return distances - """ - - if iteration is None: - end = len(self.distances) - 1 - return self.distances[end] - else: - return self.distances[iteration] - def add_distances(self, distances): """ Saves provided distances by appending them to the journal. If type==0, old weights get overwritten. @@ -218,7 +123,6 @@ def add_distances(self, distances): if self._type == 1: self.distances.append(distances) - def add_opt_values(self, opt_values): """ Saves provided values of the evaluation of the schemes objective function. If type==0, old values get overwritten @@ -235,7 +139,6 @@ def add_opt_values(self, opt_values): if self._type == 1: self.opt_values.append(opt_values) - def save(self, filename): """ Stores the journal to disk. @@ -245,51 +148,132 @@ def save(self, filename): filename: string the location of the file to store the current object to. """ - + with open(filename, 'wb') as output: pickle.dump(self, output, -1) - + def get_parameters(self, iteration=None): + """ + Returns the parameters from a sampling scheme. + + For intermediate results, pass the iteration. + + Parameters + ---------- + iteration: int + specify the iteration for which to return parameters + + Returns + ------- + names_and_parameters: dictionary + Samples from the specified iteration (last, if not specified) returned as a disctionary with names of the + random variables + """ - def posterior_mean(self): + if iteration is None: + endp = len(self.names_and_parameters) - 1 + params = self.names_and_parameters[endp] + return params + else: + return self.names_and_parameters[iteration] + + def get_weights(self, iteration=None): + """ + Returns the weights from a sampling scheme. + + For intermediate results, pass the iteration. + + Parameters + ---------- + iteration: int + specify the iteration for which to return weights + """ + + if iteration is None: + end = len(self.weights) - 1 + return self.weights[end] + else: + return self.weights[iteration] + + def get_distances(self, iteration=None): + """ + Returns the distances from a sampling scheme. + + For intermediate results, pass the iteration. + + Parameters + ---------- + iteration: int + specify the iteration for which to return distances + """ + + if iteration is None: + end = len(self.distances) - 1 + return self.distances[end] + else: + return self.distances[iteration] + + def posterior_mean(self, iteration=None): """ Computes posterior mean from the samples drawn from posterior distribution + For intermediate results, pass the iteration. + + Parameters + ---------- + iteration: int + specify the iteration for which to return posterior mean + Returns ------- - np.ndarray - posterior mean + posterior mean: dictionary + Posterior mean from the specified iteration (last, if not specified) returned as a disctionary with names of the + random variables """ - endp = len(self.parameters) - 1 - endw = len(self.weights) - 1 - params = self.parameters[endp] - weights = self.weights[endw] + if iteration is None: + endp = len(self.names_and_parameters) - 1 + params = self.names_and_parameters[endp] + weights = self.weights[endp] + else: + params = self.names_and_parameters[iteration] + weights = self.weights[iteration] - return np.average(params, weights = weights.reshape(len(weights),), axis = 0) + return_value = [] + for keyind in params.keys(): + return_value.append((keyind, np.average(np.array(params[keyind]).squeeze(), weights = weights.reshape(len(weights),), axis = 0))) - + return dict(return_value) - def posterior_cov(self): + def posterior_cov(self, iteration=None): """ Computes posterior covariance from the samples drawn from posterior distribution Returns ------- np.ndarray - posterior covariance + posterior covariance + dic + order of the variables in the covariance matrix """ - endp = len(self.parameters) - 1 - endw = len(self.weights) - 1 - params = self.parameters[endp] - weights = self.weights[endw] - - return np.cov(np.transpose(params), aweights = weights.reshape(len(weights),)) + if iteration is None: + endp = len(self.names_and_parameters) - 1 + params = self.names_and_parameters[endp] + weights = self.weights[endp] + else: + params = self.names_and_parameters[iteration] + weights = self.weights[iteration] + + joined_params = [] + for keyind in params.keys(): + joined_params.append(np.array(params[keyind]).squeeze(axis=1)) + + return np.cov(np.transpose(np.hstack(joined_params)), aweights = weights.reshape(len(weights),)), params.keys() - def posterior_histogram(self, n_bins = 10): + def posterior_histogram(self, iteration=None, n_bins = 10): """ Computes a weighted histogram of multivariate posterior samples andreturns histogram H and A list of p arrays describing the bin @@ -299,13 +283,48 @@ def posterior_histogram(self, n_bins = 10): ------- python list containing two elements (H = np.ndarray, edges = list of p arraya) - """ - endp = len(self.parameters) - 1 - endw = len(self.weights) - 1 - - params = self.parameters[endp] - weights = self.weights[endw] - weights.shape - H, edges = np.histogramdd(params, bins = n_bins, weights = weights.reshape(len(weights),)) - + """ + if iteration is None: + endp = len(self.names_and_parameters) - 1 + params = self.names_and_parameters[endp] + weights = self.weights[endp] + else: + params = self.names_and_parameters[iteration] + weights = self.weights[iteration] + + joined_params = [] + for keyind in params.keys(): + joined_params.append(np.array(params[keyind]).squeeze(axis=1)) + + H, edges = np.histogramdd(np.hstack(joined_params), bins = n_bins, weights = weights.reshape(len(weights),)) return [H, edges] + + # def add_parameters(self, params): + # """ + # Saves provided parameters by appending them to the journal. If type==0, old parameters get overwritten. + # + # Parameters + # ---------- + # params: numpy.array + # nxp matrix containing n parameters of dimension p + # """ + # + # if self._type == 0: + # self.parameters = [params] + # + # if self._type == 1: + # self.parameters.append(params) + # + # def _get_parameter_values(self): + # """ + # Returns the parameters in the order dictated by the graph structure. + # + # Returns + # ------- + # numpy.array: + # The parameters of the model + # """ + # + # endp = len(self.parameters) - 1 + # params = self.parameters[endp] + # return params diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index f49633d9..e0f23dad 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -171,10 +171,12 @@ def test_sample(self): mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape + self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -189,12 +191,14 @@ def test_sample(self): sampler.sample_from_prior(rng=np.random.RandomState(1)) journal = sampler.sample([self.observation], T, eps_arr, n_sample, n_simulate, eps_percentile) mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape + self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -229,12 +233,19 @@ def test_sample(self): sampler = SABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], steps, epsilon, n_samples, n_samples_per_param) mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] + + print(journal.get_parameters()) + print(len(mu_post_sample)) + print(mu_post_sample[0]) + print(mu_post_sample[0].shape) # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[0]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape + self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -245,12 +256,14 @@ def test_sample(self): sampler = SABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], steps, epsilon, n_samples, n_samples_per_param) mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape + self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -284,14 +297,15 @@ def test_sample(self): steps, n_samples, n_samples_per_param = 1, 10, 1 sampler = ABCsubsim([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], steps, n_samples, n_samples_per_param) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape + self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -302,14 +316,15 @@ def test_sample(self): sampler = ABCsubsim([self.model], [self.dist_calc], self.backend, seed = 1) sampler.sample_from_prior(rng=np.random.RandomState(1)) journal = sampler.sample([self.observation], steps, n_samples, n_samples_per_param) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape + self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -344,14 +359,15 @@ def test_sample(self): steps, n_sample, n_simulate = 1, 10, 1 sampler = SMCABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], steps, n_sample, n_simulate) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape + self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -362,14 +378,14 @@ def test_sample(self): T, n_sample, n_simulate = 2, 10, 1 sampler = SMCABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], T, n_sample, n_simulate) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -403,14 +419,14 @@ def test_sample(self): steps, n_sample, n_simulate = 1, 10, 1 sampler = APMCABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], steps, n_sample, n_simulate) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -422,14 +438,14 @@ def test_sample(self): T, n_sample, n_simulate = 2, 10, 1 sampler = APMCABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], T, n_sample, n_simulate) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -463,14 +479,14 @@ def test_sample(self): steps, n_sample, n_simulate = 1, 10, 1 sampler = RSMCABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], steps, n_sample, n_simulate) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) @@ -484,14 +500,14 @@ def test_sample(self): sampler = RSMCABC([self.model], [self.dist_calc], self.backend, seed = 1) journal = sampler.sample([self.observation], steps, n_sample, n_simulate) sampler.sample_from_prior(rng=np.random.RandomState(1)) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( - journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) From ddad093f90d2052ef2841cf85f4a44d87f3bbd95 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 30 Jan 2019 14:46:13 +0000 Subject: [PATCH 07/20] SMCABC fixed for HD variables --- abcpy/inferences.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index ddcde60a..ab7bd44c 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2636,6 +2636,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace=1, p=new_weights) print(accepted_parameters) accepted_parameters = accepted_parameters[index_resampled] + print(accepted_parameters) new_weights = np.ones(shape=(n_samples), ) * (1.0 / n_samples) # Update the weights @@ -2675,7 +2676,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 params_and_ysim_pds = self.backend.map(self._accept_parameter, rng_and_index_pds) params_and_ysim = self.backend.collect(params_and_ysim_pds) new_parameters, new_y_sim, distances, counter = [list(t) for t in zip(*params_and_ysim)] - new_parameters = np.array(new_parameters) + #new_parameters = np.array(new_parameters) distances = np.array(distances) for count in counter: @@ -2811,7 +2812,8 @@ def _accept_parameter(self, rng_and_index, npc=None): counter+=1 else: if self.accepted_parameters_manager.accepted_weights_bds.value()[index] > 0: - theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) + theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[index] + #theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) while True: perturbation_output = self.perturb(index, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: @@ -2831,12 +2833,14 @@ def _accept_parameter(self, rng_and_index, npc=None): ratio_data_epsilon = 1 else: ratio_data_epsilon = numerator / denominator + ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, theta) kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, perturbation_output[1]) ratio_likelihood_prob = kernel_numerator / kernel_denominator + acceptance_prob = min(1, ratio_data_epsilon * ratio_prior_prob * ratio_likelihood_prob) if rng.binomial(1, acceptance_prob) == 1: self.set_parameters(perturbation_output[1]) From 3b405eed80963f1ce373acae092ad048c6eee854 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 30 Jan 2019 17:42:21 +0000 Subject: [PATCH 08/20] All inference schemes adapted to the new modification in Graphical Model structure --- abcpy/inferences.py | 88 +++++++++++++++++++++++++-------------- tests/inferences_tests.py | 5 --- 2 files changed, 56 insertions(+), 37 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index ab7bd44c..6b7a839e 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -1114,7 +1114,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ else: journal = Journal.fromFile(journal_file) - accepted_parameters = np.zeros(shape=(n_samples, len(self.get_parameters(self.model)))) + accepted_parameters = None distances = np.zeros(shape=(n_samples,)) smooth_distances = np.zeros(shape=(n_samples,)) accepted_weights = np.ones(shape=(n_samples, 1)) @@ -1191,7 +1191,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ for count in counter: self.simulation_counter+=count - new_parameters = np.array(new_parameters) + #new_parameters = np.array(new_parameters) new_distances = np.array(new_distances) new_all_distances = np.concatenate(new_all_distances) index = np.array(index) @@ -1204,7 +1204,12 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ all_distances = new_all_distances # Initialize/Update the accepted parameters and their corresponding distances - accepted_parameters[index[acceptance == 1], :] = new_parameters[acceptance == 1, :] + if accepted_parameters is None: + accepted_parameters = new_parameters + else: + for ind in range(len(acceptance)): + if acceptance[ind] == 1: + accepted_parameters[index[ind]] = new_parameters[ind] distances[index[acceptance == 1]] = new_distances[acceptance == 1] # 2: Smoothing of the distances @@ -1241,7 +1246,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ weight = np.exp(-smooth_distances * delta / U) weight = weight / sum(weight) index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace=1, p=weight) - accepted_parameters = accepted_parameters[index_resampled, :] + accepted_parameters = accepted_parameters[index_resampled] smooth_distances = smooth_distances[index_resampled] ## Update U and epsilon: @@ -1256,7 +1261,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix # Broadcast Accepted parameters and add to journal - self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters) # Compute Accepetd Kernel parameters and broadcast them kernel_parameters = [] for kernel in self.kernel.kernels: @@ -1285,7 +1290,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ else: ## Compute and broadcast accepted parameters, accepted kernel parameters and accepted Covariance matrix # Broadcast Accepted parameters - self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights= accepted_weights, accepted_parameters=accepted_parameters) # Compute Accepetd Kernel parameters and broadcast them kernel_parameters = [] for kernel in self.kernel.kernels: @@ -1294,7 +1299,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters) # Compute Kernel Covariance Matrix and broadcast it new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) - if accepted_parameters.shape[1] > 1: + if len(accepted_parameters[0]) > 1: accepted_cov_mats = [beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(len(new_cov_mat)) for new_cov_mat in new_cov_mats] else: @@ -1429,7 +1434,7 @@ def _accept_parameter(self, data, npc=None): while acceptance == 0: self.sample_from_prior(rng=rng) - new_theta = np.array(self.get_parameters()).reshape(-1,) + new_theta = self.get_parameters() all_parameters.append(new_theta) y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 @@ -1441,12 +1446,12 @@ def _accept_parameter(self, data, npc=None): ## Select one arbitrary particle: index = rng.choice(self.n_samples, size=1)[0] ## Sample proposal parameter and calculate new distance: - theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[index,:] + theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[index] while True: perturbation_output = self.perturb(index, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: - new_theta = np.array(perturbation_output[1]).reshape(-1,) + new_theta = perturbation_output[1] break y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) @@ -1456,7 +1461,7 @@ def _accept_parameter(self, data, npc=None): ## Calculate acceptance probability: ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, - self.accepted_parameters_manager.accepted_parameters_bds.value()[index, :]) + self.accepted_parameters_manager.accepted_parameters_bds.value()[index]) ratio_likelihood_prob = np.exp((self.smooth_distances_bds.value()[index] - smooth_distance) / self.epsilon) acceptance_prob = ratio_prior_prob * ratio_likelihood_prob @@ -1599,7 +1604,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # 0: update remotely required variables self.logger.info("Broadcasting parameters") - self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights = accepted_weights, accepted_parameters=accepted_parameters) # 1: Calculate parameters # print("INFO: Initial accepted parameter parameters") @@ -1613,15 +1618,20 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for count in counter: self.simulation_counter+=count - accepted_parameters = np.concatenate(new_parameters) + if aStep > 0: + accepted_parameters = [] + for ind in range(len(new_parameters)): + accepted_parameters += new_parameters[ind] + else: + accepted_parameters = new_parameters distances = np.concatenate(new_distances) # 2: Sort and renumber samples self.logger.debug("Sort and renumber samples.") - SortIndex = sorted(range(len(distances)), key=lambda k: distances[k]) - distances = distances[SortIndex] - accepted_parameters = accepted_parameters[SortIndex, :] + accepted_params_and_dist = zip(distances, accepted_parameters) + accepted_params_and_dist = sorted(accepted_params_and_dist, key = lambda x: x[0]) + distances, accepted_parameters = [list(t) for t in zip(*accepted_params_and_dist)] # 3: Calculate and broadcast annealling parameters self.logger.debug("Calculate and broadcast annealling parameters.") @@ -1642,11 +1652,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for kernel in self.kernel.kernels: kernel_parameters.append( self.accepted_parameters_manager.get_accepted_parameters_bds_values(kernel.models)) - self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters) - accepted_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) - else: accepted_cov_mats = pow(2,1)*accepted_cov_mats @@ -1747,10 +1754,10 @@ def _accept_parameter(self, rng_and_index, npc=None): y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) - result_theta.append(self.get_parameters()) + result_theta += self.get_parameters() result_distance.append(distance) else: - theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) + theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[index] self.set_parameters(theta) y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 @@ -1783,7 +1790,7 @@ def _accept_parameter(self, rng_and_index, npc=None): else: result_theta.append(theta) result_distance.append(distance) - return (result_theta, result_distance, counter) + return result_theta, result_distance, counter def _update_cov_mat(self, rng_t, npc=None): """ @@ -1809,7 +1816,7 @@ def _update_cov_mat(self, rng_t, npc=None): accepted_cov_mats_transformed = [cov_mat*pow(2.0, -2.0 * t) for cov_mat in self.accepted_parameters_manager.accepted_cov_mats_bds.value()] - theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[0]).reshape(-1,) + theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[0] mapping_for_kernels, garbage_index = self.accepted_parameters_manager.get_mapping( self.accepted_parameters_manager.model) @@ -1965,6 +1972,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 accepted_parameters = None accepted_cov_mat = None accepted_dist = None + accepted_weights = None # main RSMCABC algorithm for aStep in range(steps): @@ -1973,7 +1981,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if aStep == 0 and journal_file is not None: accepted_parameters=journal.parameters[-1] - self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights= accepted_weights, accepted_parameters=accepted_parameters) kernel_parameters = [] for kernel in self.kernel.kernels: @@ -1991,6 +1999,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # 0: Compute epsilon, compute new covariance matrix for Kernel, # and finally Drawing new new/perturbed samples using prior or MCMC Kernel # print("DEBUG: Iteration " + str(aStep) + " of RSMCABC algorithm.") + self.logger.info("Compute epsilon and calculating covariance matrix.") if aStep == 0: n_replenish = n_samples # Compute epsilon @@ -2024,19 +2033,21 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 rng_pds = self.backend.parallelize(rng_arr) # update remotely required variables + self.logger.info("Broadcasting parameters.") # print("INFO: Broadcasting parameters.") self.epsilon = epsilon self.R = R + self.logger.info("Broadcast updated variable.") # Broadcast updated variable self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) self._update_broadcasts(accepted_dist) # calculate resample parameters + self.logger.info("Resampling parameters") # print("INFO: Resampling parameters") params_and_dist_index_pds = self.backend.map(self._accept_parameter, rng_pds) params_and_dist_index = self.backend.collect(params_and_dist_index_pds) new_parameters, new_dist, new_index, counter = [list(t) for t in zip(*params_and_dist_index)] - new_parameters = np.array(new_parameters) new_dist = np.array(new_dist) new_index = np.array(new_index) @@ -2044,40 +2055,53 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.simulation_counter+=count # 1: Update all parameters, compute acceptance probability, compute epsilon + self.logger.info("Append updated new parameters.") if len(new_dist) == self.n_samples: accepted_parameters = new_parameters accepted_dist = new_dist + accepted_weights = np.ones(shape=(len(accepted_parameters), 1)) * (1 / len(accepted_parameters)) else: - accepted_parameters = np.concatenate((accepted_parameters, new_parameters)) + accepted_parameters += new_parameters accepted_dist = np.concatenate((accepted_dist, new_dist)) + accepted_weights = np.ones(shape=(len(accepted_parameters), 1)) * (1 / len(accepted_parameters)) if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): self.logger.info("Saving configuration to output journal.") journal.add_distances(copy.deepcopy(accepted_dist)) - journal.add_weights(np.ones(shape=(len(accepted_parameters), 1)) * (1 / len(accepted_parameters))) - self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) + journal.add_weights(accepted_weights) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters) names_and_parameters = self._get_names_and_parameters() journal.add_user_parameters(names_and_parameters) journal.number_of_simulations.append(self.simulation_counter) # 2: Compute acceptance probabilty and set R + self.logger.info("Compute acceptance probabilty and set R") prob_acceptance = sum(new_index) / (R * n_replenish) if prob_acceptance == 1 or prob_acceptance == 0: R = 1 else: R = int(np.log(const) / np.log(1 - prob_acceptance)) + self.logger.info("Order accepted parameters and distances") n_replenish = round(n_samples * alpha) accepted_params_and_dist = zip(accepted_dist, accepted_parameters) accepted_params_and_dist = sorted(accepted_params_and_dist, key = lambda x: x[0]) accepted_dist, accepted_parameters = [list(t) for t in zip(*accepted_params_and_dist)] + + self.logger.info("Throw away N_alpha particles with largest dist") # Throw away N_alpha particles with largest dist - accepted_parameters = np.delete(accepted_parameters, np.arange(round(n_samples * alpha)) + ( - self.n_samples - round(n_samples * alpha)), 0) + # accepted_parameters = np.delete(accepted_parameters, np.arange(round(n_samples * alpha)) + ( + # self.n_samples - round(n_samples * alpha)), 0) + + del accepted_parameters[self.n_samples - round(n_samples * alpha):] accepted_dist = np.delete(accepted_dist, np.arange(round(n_samples * alpha)) + (n_samples - round(n_samples * alpha)), 0) - self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) + + accepted_weights = np.ones(shape=(len(accepted_parameters), 1)) * (1 / len(accepted_parameters)) + self.logger.info("Update parameters, weights") + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights=accepted_weights, + accepted_parameters=accepted_parameters) # Add epsilon_arr to the journal @@ -2129,7 +2153,7 @@ def _accept_parameter(self, rng, npc=None): index_accept = 1 else: index = rng.choice(len(self.accepted_parameters_manager.accepted_parameters_bds.value()), size=1) - theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index[0]]).reshape(-1,) + theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[index[0]] index_accept = 0.0 for ind in range(self.R): while True: diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index e0f23dad..c1243fc8 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -237,11 +237,6 @@ def test_sample(self): # Compute posterior mean mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] - print(journal.get_parameters()) - print(len(mu_post_sample)) - print(mu_post_sample[0]) - print(mu_post_sample[0].shape) - # test shape of sample mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[0]), \ (len(sigma_post_sample), sigma_post_sample[0].shape[1]), post_weights.shape From fc4eab246c8effa5fe866487f877f48eaf0f1aea Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 30 Jan 2019 20:13:58 +0000 Subject: [PATCH 09/20] Making inference schemes pass test --- abcpy/inferences.py | 2 +- examples/backends/mpi/pmcabc_gaussian.py | 8 ++-- .../pmcabc_gaussian_summary_selection.py | 23 ++++------- tests/inferences_tests.py | 4 +- tests/output_tests.py | 40 +++++++++---------- 5 files changed, 35 insertions(+), 42 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 6b7a839e..161dfaab 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2241,7 +2241,7 @@ def __init__(self, root_models, distances, backend, kernel=None, seed=None): self.simulation_counter = 0 - def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.9, acceptance_cutoff = 0.03, covFactor = 2.0, full_output=0, journal_file = None): + def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.1, acceptance_cutoff = 0.03, covFactor = 2.0, full_output=0, journal_file = None): """Samples from the posterior distribution of the model parameter given the observed data observations. diff --git a/examples/backends/mpi/pmcabc_gaussian.py b/examples/backends/mpi/pmcabc_gaussian.py index 29d5b540..f7474f86 100644 --- a/examples/backends/mpi/pmcabc_gaussian.py +++ b/examples/backends/mpi/pmcabc_gaussian.py @@ -17,12 +17,12 @@ def infer_parameters(): # define prior from abcpy.continuousmodels import Uniform - mu = Uniform([[150], [200]], ) - sigma = Uniform([[5], [25]], ) + mu = Uniform([[150], [200]], name='mu') + sigma = Uniform([[5], [25]], name='sigma') # define the model from abcpy.continuousmodels import Normal - height = Normal([mu, sigma], ) + height = Normal([mu, sigma], name='height') # define statistics from abcpy.statistics import Identity @@ -85,7 +85,7 @@ def setUpModule(): class ExampleGaussianMPITest(unittest.TestCase): def test_example(self): journal = infer_parameters() - test_result = journal.posterior_mean()[0] + test_result = journal.posterior_mean()['mu'] expected_result = 171.4343638312893 self.assertLess(abs(test_result - expected_result), 2) diff --git a/examples/summaryselection/pmcabc_gaussian_summary_selection.py b/examples/summaryselection/pmcabc_gaussian_summary_selection.py index 06ccee0b..4caf70d0 100644 --- a/examples/summaryselection/pmcabc_gaussian_summary_selection.py +++ b/examples/summaryselection/pmcabc_gaussian_summary_selection.py @@ -27,13 +27,15 @@ def infer_parameters(): summary_selection = Semiautomatic([height], statistics_calculator, backend, n_samples=1000,n_samples_per_param=1, seed=1) + print('Hello') + # Redefine the statistics function statistics_calculator.statistics = lambda x, f2=summary_selection.transformation, \ f1=statistics_calculator.statistics: f2(f1(x)) # define distance - from abcpy.distances import LogReg - distance_calculator = LogReg(statistics_calculator) + from abcpy.distances import Euclidean + distance_calculator = Euclidean(statistics_calculator) # define kernel from abcpy.perturbationkernel import DefaultKernel @@ -44,8 +46,8 @@ def infer_parameters(): sampler = PMCABC([height], [distance_calculator], backend, kernel, seed=1) # sample from scheme - T, n_sample, n_samples_per_param = 3, 250, 10 - eps_arr = np.array([.75]) + T, n_sample, n_samples_per_param = 3, 10, 10 + eps_arr = np.array([500]) epsilon_percentile = 10 journal = sampler.sample([height_obs], T, eps_arr, n_sample, n_samples_per_param, epsilon_percentile) @@ -54,7 +56,7 @@ def infer_parameters(): def analyse_journal(journal): # output parameters and weights - print(journal.get_stored_output_values()) + print(journal.opt_values) print(journal.get_weights()) # do post analysis @@ -72,16 +74,7 @@ def analyse_journal(journal): new_journal = Journal.fromFile('experiments.jnl') -# this code is for testing purposes only and not relevant to run the example -import unittest -class ExampleGaussianDummyTest(unittest.TestCase): - def test_example(self): - journal = infer_parameters() - test_result = journal.posterior_mean()[0] - expected_result = 176 - self.assertLess(abs(test_result - expected_result), 2.) - - +# this code is for testing purposes only and not relevant to run the exampl if __name__ == "__main__": journal = infer_parameters() analyse_journal(journal) diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index c1243fc8..781cc2ec 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -413,7 +413,7 @@ def test_sample(self): # use the APMCABC scheme for T = 1 steps, n_sample, n_simulate = 1, 10, 1 sampler = APMCABC([self.model], [self.dist_calc], self.backend, seed = 1) - journal = sampler.sample([self.observation], steps, n_sample, n_simulate) + journal = sampler.sample([self.observation], steps, n_sample, n_simulate, alpha=.9) mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) # Compute posterior mean @@ -432,7 +432,7 @@ def test_sample(self): # use the APMCABC scheme for T = 2 T, n_sample, n_simulate = 2, 10, 1 sampler = APMCABC([self.model], [self.dist_calc], self.backend, seed = 1) - journal = sampler.sample([self.observation], T, n_sample, n_simulate) + journal = sampler.sample([self.observation], T, n_sample, n_simulate, alpha=.9) mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) # Compute posterior mean diff --git a/tests/output_tests.py b/tests/output_tests.py index 9bd9639d..c9aeacc1 100644 --- a/tests/output_tests.py +++ b/tests/output_tests.py @@ -4,24 +4,24 @@ from abcpy.output import Journal class JournalTests(unittest.TestCase): - def test_add_parameters(self): - params1 = np.zeros((2,4)) - params2 = np.ones((2,4)) - - # test whether production mode only stores the last set of parameters - journal_prod = Journal(0) - journal_prod.add_parameters(params1) - journal_prod.add_parameters(params2) - self.assertEqual(len(journal_prod.parameters), 1) - np.testing.assert_equal(journal_prod.parameters[0], params2) - - # test whether reconstruction mode stores all parameter sets - journal_recon = Journal(1) - journal_recon.add_parameters(params1) - journal_recon.add_parameters(params2) - self.assertEqual(len(journal_recon.parameters), 2) - np.testing.assert_equal(journal_recon.parameters[0], params1) - np.testing.assert_equal(journal_recon.parameters[1], params2) + # def test_add_parameters(self): + # params1 = np.zeros((2,4)) + # params2 = np.ones((2,4)) + # + # # test whether production mode only stores the last set of parameters + # journal_prod = Journal(0) + # journal_prod.add_parameters(params1) + # journal_prod.add_parameters(params2) + # self.assertEqual(len(journal_prod.parameters), 1) + # np.testing.assert_equal(journal_prod.parameters[0], params2) + # + # # test whether reconstruction mode stores all parameter sets + # journal_recon = Journal(1) + # journal_recon.add_parameters(params1) + # journal_recon.add_parameters(params2) + # self.assertEqual(len(journal_recon.parameters), 2) + # np.testing.assert_equal(journal_recon.parameters[0], params1) + # np.testing.assert_equal(journal_recon.parameters[1], params2) @@ -72,12 +72,12 @@ def test_load_and_save(self): weights1 = np.zeros((2,4)) journal = Journal(0) - journal.add_parameters(params1) + #journal.add_parameters(params1) journal.add_weights(weights1) journal.save('journal_tests_testfile.pkl') new_journal = Journal.fromFile('journal_tests_testfile.pkl') - np.testing.assert_equal(journal.parameters, new_journal.parameters) + #np.testing.assert_equal(journal.parameters, new_journal.parameters) np.testing.assert_equal(journal.weights, new_journal.weights) From c9abae638d294503bb79a65e8b91a75d1e9d2a98 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 30 Jan 2019 21:55:32 +0000 Subject: [PATCH 10/20] Make reading from a journal file to start in middle is facilitated --- abcpy/graphtools.py | 1 + abcpy/inferences.py | 38 ++++++++++++++--------- abcpy/output.py | 75 +++++++++++++++++++++++++-------------------- 3 files changed, 66 insertions(+), 48 deletions(-) diff --git a/abcpy/graphtools.py b/abcpy/graphtools.py index 0d6d653c..9a276434 100644 --- a/abcpy/graphtools.py +++ b/abcpy/graphtools.py @@ -241,6 +241,7 @@ def _get_names_and_parameters(self): return_value = [] for model, index in mapping: + return_value.append((model.name, self.accepted_parameters_manager.get_accepted_parameters_bds_values([model]))) return return_value diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 161dfaab..95881e7a 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -417,8 +417,8 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples for aStep in range(steps): self.logger.debug("iteration {} of PMC algorithm".format(aStep)) if(aStep==0 and journal_file is not None): - accepted_parameters = journal.parameters[-1] - accepted_weights = journal.weights[-1] + accepted_parameters = journal.get_accepted_parameters(-1) + accepted_weights = journal.get_weights(-1) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) @@ -505,6 +505,7 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples self.logger.info("Save configuration to output journal") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, @@ -771,8 +772,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("Starting pmc iterations") for aStep in range(steps): if(aStep==0 and journal_file is not None): - accepted_parameters = journal.parameters[-1] - accepted_weights = journal.weights[-1] + accepted_parameters = journal.get_accepted_parameters(-1) + accepted_weights = journal.get_weights(-1) approx_likelihood_new_parameters = journal.opt_values[-1] self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) @@ -865,6 +866,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("Saving configuration to output journal") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(approx_likelihood_new_parameters) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, @@ -1139,8 +1141,8 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ for aStep in range(0, steps): self.logger.debug("step {}".format(aStep)) if(aStep==0 and journal_file is not None): - accepted_parameters=journal.parameters[-1] - accepted_weights=journal.weights[-1] + accepted_parameters=journal.get_accepted_parameters(-1) + accepted_weights=journal.get_weights(-1) #Broadcast Accepted parameters and Accedpted weights self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) @@ -1282,6 +1284,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ if (full_output == 1 and aStep<= steps-1): ## Saving intermediate configuration to output journal. print('Saving after resampling') + journal.get_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() @@ -1310,6 +1313,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ if (full_output == 1 and aStep <= steps-1): ## Saving intermediate configuration to output journal. + journal.get_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() @@ -1319,6 +1323,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ # Add epsilon_arr, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if (full_output == 0) or (full_output ==1 and broken_preemptively and aStep<= steps-1): + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) self.accepted_parameters_manager.update_broadcast(self.backend,accepted_parameters=accepted_parameters,accepted_weights=accepted_weights) @@ -1587,8 +1592,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for aStep in range(0, steps): self.logger.info("ABCsubsim step {}".format(aStep)) if aStep==0 and journal_file is not None: - accepted_parameters = journal.parameters[-1] - accepted_weights = journal.weights[-1] + accepted_parameters = journal.get_accepted_parameters(-1) + accepted_weights = journal.get_weights(-1) accepted_cov_mats = journal.opt_values[-1] # main ABCsubsim algorithm @@ -1684,6 +1689,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if full_output == 1: self.logger.info("Saving intermediate configuration to output journal") + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(accepted_cov_mats) @@ -1704,6 +1710,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # Add anneal_parameter, number of final steps and final output to the journal # print("INFO: Saving final configuration to output journal.") if full_output == 0: + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(accepted_cov_mats) @@ -1979,7 +1986,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("RSMCABC iteration {}".format(aStep)) if aStep == 0 and journal_file is not None: - accepted_parameters=journal.parameters[-1] + accepted_parameters=journal.get_accepted_parameters(-1) + accepted_weights = journal.get_weights(-1) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights= accepted_weights, accepted_parameters=accepted_parameters) @@ -2067,6 +2075,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): self.logger.info("Saving configuration to output journal.") + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(accepted_dist)) journal.add_weights(accepted_weights) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_weights=accepted_weights, accepted_parameters=accepted_parameters) @@ -2300,8 +2309,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for aStep in range(steps): self.logger.info("APMCABC iteration {}".format(aStep)) if(aStep==0 and journal_file is not None): - accepted_parameters=journal.parameters[-1] - accepted_weights=journal.weights[-1] + accepted_parameters=journal.get_accepted_parameters(-1) + accepted_weights=journal.get_weights(-1) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) @@ -2389,6 +2398,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # print("INFO: Saving configuration to output journal.") if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(accepted_dist)) journal.add_weights(copy.deepcopy(accepted_weights)) self.accepted_parameters_manager.update_broadcast(self.backend, @@ -2597,8 +2607,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("SMCABC iteration {}".format(aStep)) if(aStep==0 and journal_file is not None): - accepted_parameters=journal.parameters[-1] - accepted_weights=journal.weights[-1] + accepted_parameters=journal.get_accepted_parameters(-1) + accepted_weights=journal.get_weights(-1) accepted_y_sim = journal.opt_values[-1] self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, @@ -2713,7 +2723,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): self.logger.info("Saving configuration to output journal") self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) - + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_distances(copy.deepcopy(distances)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_opt_values(copy.deepcopy(accepted_y_sim)) diff --git a/abcpy/output.py b/abcpy/output.py index c34d528b..41bb9775 100644 --- a/abcpy/output.py +++ b/abcpy/output.py @@ -32,7 +32,7 @@ def __init__(self, type): type=1 logs all generated information (reproducibily use). """ - #self.parameters = [] + self.accepted_parameters = [] self.names_and_parameters = [] self.weights = [] self.distances = [] @@ -91,6 +91,21 @@ def add_user_parameters(self, names_and_params): else: self.names_and_parameters.append(dict(names_and_params)) + def add_accepted_parameters(self, accepted_parameters): + """ + Saves provided weights by appending them to the journal. If type==0, old weights get overwritten. + + Parameters + ---------- + accepted_parameters: list + """ + + if self._type == 0: + self.accepted_parameters = [accepted_parameters] + + if self._type == 1: + self.accepted_parameters.append(accepted_parameters) + def add_weights(self, weights): """ Saves provided weights by appending them to the journal. If type==0, old weights get overwritten. @@ -177,6 +192,30 @@ def get_parameters(self, iteration=None): else: return self.names_and_parameters[iteration] + def get_accepted_parameters(self, iteration=None): + """ + Returns the accepted parameters from a sampling scheme. + + For intermediate results, pass the iteration. + + Parameters + ---------- + iteration: int + specify the iteration for which to return parameters + + Returns + ------- + accepted_parameters: dictionary + Samples from the specified iteration (last, if not specified) returned as a disctionary with names of the + random variables + """ + + if iteration is None: + return self.accepted_parameters[-1] + + else: + return self.accepted_parameters[iteration] + def get_weights(self, iteration=None): """ Returns the weights from a sampling scheme. @@ -271,8 +310,6 @@ def posterior_cov(self, iteration=None): return np.cov(np.transpose(np.hstack(joined_params)), aweights = weights.reshape(len(weights),)), params.keys() - - def posterior_histogram(self, iteration=None, n_bins = 10): """ Computes a weighted histogram of multivariate posterior samples @@ -297,34 +334,4 @@ def posterior_histogram(self, iteration=None, n_bins = 10): joined_params.append(np.array(params[keyind]).squeeze(axis=1)) H, edges = np.histogramdd(np.hstack(joined_params), bins = n_bins, weights = weights.reshape(len(weights),)) - return [H, edges] - - # def add_parameters(self, params): - # """ - # Saves provided parameters by appending them to the journal. If type==0, old parameters get overwritten. - # - # Parameters - # ---------- - # params: numpy.array - # nxp matrix containing n parameters of dimension p - # """ - # - # if self._type == 0: - # self.parameters = [params] - # - # if self._type == 1: - # self.parameters.append(params) - # - # def _get_parameter_values(self): - # """ - # Returns the parameters in the order dictated by the graph structure. - # - # Returns - # ------- - # numpy.array: - # The parameters of the model - # """ - # - # endp = len(self.parameters) - 1 - # params = self.parameters[endp] - # return params + return [H, edges] \ No newline at end of file From deab6f6f898a4edf5d8996ba0b276813c0457b0b Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Fri, 1 Feb 2019 14:13:13 +0000 Subject: [PATCH 11/20] Amended SABC to new HD modifications --- abcpy/inferences.py | 61 ++++++++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 95881e7a..7e6b056d 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -234,12 +234,11 @@ def sample(self, observations, n_samples, n_samples_per_param, epsilon, full_out for count in counter: self.simulation_counter+=count - accepted_parameters = np.array(accepted_parameters) distances = np.array(distances) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) - + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(np.ones((n_samples, 1)))) journal.add_distances(copy.deepcopy(distances)) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) @@ -735,10 +734,10 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # Initialize particles: When not supplied, randomly draw them from prior distribution # Weights of particles: Assign equal weights for each of the particles if iniPoints == None: - accepted_parameters = np.zeros(shape=(n_samples, dim)) + accepted_parameters = [] for ind in range(0, n_samples): self.sample_from_prior(rng=self.rng) - accepted_parameters[ind, :] = self.get_parameters() + accepted_parameters.append(self.get_parameters()) accepted_weights = np.ones((n_samples, 1), dtype=np.float) / n_samples else: accepted_parameters = iniPoints @@ -802,15 +801,15 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # 1: calculate resample parameters self.logger.info("Resample parameters") - index = self.rng.choice(accepted_parameters.shape[0], size=n_samples, p=accepted_weights.reshape(-1)) + index = self.rng.choice(len(accepted_parameters), size=n_samples, p=accepted_weights.reshape(-1)) # Choose a new particle using the resampled particle (make the boundary proper) # Initialize new_parameters - new_parameters = np.zeros((n_samples, dim), dtype=np.float) + new_parameters = [] for ind in range(0, self.n_samples): while True: perturbation_output = self.perturb(index[ind], rng=self.rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1])!= 0: - new_parameters[ind, :] = perturbation_output[1] + new_parameters.append(perturbation_output[1]) break # 2: calculate approximate lieklihood for new parameters self.logger.info("Calculate approximate likelihood") @@ -1156,12 +1155,14 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters) new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) - - if accepted_parameters.shape[1] > 1: - accepted_cov_mats = [beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(len(new_cov_mat)) for - new_cov_mat in new_cov_mats] - else: - accepted_cov_mats = [beta*new_cov_mat + 0.0001*(new_cov_mat)*np.eye(accepted_parameters.shape[1]) for new_cov_mat in new_cov_mats] + accepted_cov_mats = [] + for new_cov_mat in new_cov_mats: + if new_cov_mat.shape[0] > 1: + accepted_cov_mats.append( + beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) + else: + accepted_cov_mats.append( + beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(new_cov_mat.shape[0])) # Broadcast Accepted Covariance Matrix self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) @@ -1272,19 +1273,21 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters) # Compute Kernel Covariance Matrix and broadcast it new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) - if accepted_parameters.shape[1] > 1: - accepted_cov_mats = [beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(len(new_cov_mat)) - for new_cov_mat in new_cov_mats] - else: - accepted_cov_mats = [ - beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(accepted_parameters.shape[1]) - for new_cov_mat in new_cov_mats] + accepted_cov_mats = [] + for new_cov_mat in new_cov_mats: + if new_cov_mat.shape[0] > 1: + accepted_cov_mats.append( + beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) + else: + accepted_cov_mats.append( + beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(new_cov_mat.shape[0])) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) if (full_output == 1 and aStep<= steps-1): ## Saving intermediate configuration to output journal. print('Saving after resampling') - journal.get_accepted_parameters(copy.deepcopy(accepted_parameters)) + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() @@ -1302,18 +1305,18 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters) # Compute Kernel Covariance Matrix and broadcast it new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) - if len(accepted_parameters[0]) > 1: - accepted_cov_mats = [beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(len(new_cov_mat)) - for new_cov_mat in new_cov_mats] - else: - accepted_cov_mats = [ - beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(accepted_parameters.shape[1]) - for new_cov_mat in new_cov_mats] + accepted_cov_mats = [] + for new_cov_mat in new_cov_mats: + if new_cov_mat.shape[0]>1: + accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) + else: + accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(new_cov_mat.shape[0])) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) if (full_output == 1 and aStep <= steps-1): ## Saving intermediate configuration to output journal. - journal.get_accepted_parameters(copy.deepcopy(accepted_parameters)) + journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) journal.add_distances(copy.deepcopy(distances)) names_and_parameters = self._get_names_and_parameters() From f144c34e09732954c5d1ef13aab795d4057f2613 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Fri, 1 Feb 2019 16:25:54 +0000 Subject: [PATCH 12/20] Amended PMC for HD updates --- abcpy/inferences.py | 108 ++++++++++++++------------------------ tests/inferences_tests.py | 28 ++++++---- 2 files changed, 55 insertions(+), 81 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 7e6b056d..b57d5d63 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -773,7 +773,6 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if(aStep==0 and journal_file is not None): accepted_parameters = journal.get_accepted_parameters(-1) accepted_weights = journal.get_weights(-1) - approx_likelihood_new_parameters = journal.opt_values[-1] self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) @@ -797,9 +796,10 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 # 0: update remotely required variables self.logger.info("Broadcasting parameters") - self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights, accepted_cov_mats=accepted_cov_mats) + self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, + accepted_weights=accepted_weights, accepted_cov_mats=accepted_cov_mats) - # 1: calculate resample parameters + # 1: Resample parameters self.logger.info("Resample parameters") index = self.rng.choice(len(accepted_parameters), size=n_samples, p=accepted_weights.reshape(-1)) # Choose a new particle using the resampled particle (make the boundary proper) @@ -811,15 +811,26 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1])!= 0: new_parameters.append(perturbation_output[1]) break + # 2: calculate approximate lieklihood for new parameters self.logger.info("Calculate approximate likelihood") - merged_sim_data_parameter = self.flat_map(new_parameters, self.n_samples_per_param, self._simulate_data) + seed_arr = self.rng.randint(0, np.iinfo(np.uint32).max, size=self.n_samples, dtype=np.uint32) + rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) + data_arr = [] + for i in range(len(rng_arr)): + data_arr.append([new_parameters[i], rng_arr[i]]) + data_pds = self.backend.parallelize(data_arr) + + approx_likelihood_new_parameters_and_counter_pds = self.backend.map(self._approx_lik_calc, data_pds) + self.logger.debug("collect approximate likelihood from pds") + approx_likelihood_new_parameters_and_counter = self.backend.collect(approx_likelihood_new_parameters_and_counter_pds) + approx_likelihood_new_parameters, counter = [list(t) for t in + zip(*approx_likelihood_new_parameters_and_counter)] - # Compute likelihood for each parameter value - approx_likelihood_new_parameters, counter = self.simple_map(merged_sim_data_parameter, self._approx_calc) approx_likelihood_new_parameters = np.array(approx_likelihood_new_parameters).reshape(-1, 1) + for count in counter: - self.simulation_counter+=count + self.simulation_counter += count # 3: calculate new weights for new parameters self.logger.info("Calculating weights") @@ -831,9 +842,9 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 for i in range(0, self.n_samples): new_weights[i] = new_weights[i] * approx_likelihood_new_parameters[i] sum_of_weights += new_weights[i] - - #print("new_weights : ", new_weights, ", sum_of_weights : ", sum_of_weights) new_weights = new_weights / sum_of_weights + + self.logger.info("new_weights : ", new_weights, ", sum_of_weights : ", sum_of_weights) accepted_parameters = new_parameters self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=new_weights) @@ -843,8 +854,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("Calculating covariance matrix") kernel_parameters = [] for kernel in self.kernel.kernels: - kernel_parameters.append( - self.accepted_parameters_manager.get_accepted_parameters_bds_values(kernel.models)) + kernel_parameters.append(self.accepted_parameters_manager.get_accepted_parameters_bds_values(kernel.models)) self.accepted_parameters_manager.update_kernel_values(self.backend, kernel_parameters=kernel_parameters) @@ -867,7 +877,6 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 if (full_output == 1 and aStep <= steps - 1) or (full_output == 0 and aStep == steps - 1): journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(accepted_weights)) - journal.add_opt_values(approx_likelihood_new_parameters) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters, accepted_weights=accepted_weights) names_and_parameters = self._get_names_and_parameters() @@ -876,83 +885,44 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 return journal - ## Simple_map and Flat_map: Python wrapper for nested parallelization - def simple_map(self, data, map_function): - data_pds = self.backend.parallelize(data) - result_pds = self.backend.map(map_function, data_pds) - result = self.backend.collect(result_pds) - main_result, counter = [list(t) for t in zip(*result)] - return main_result, counter - def flat_map(self, data, n_repeat, map_function): - # Create an array of data, with each data repeated n_repeat many times - repeated_data = np.repeat(data, n_repeat, axis=0) - # Create an see array - n_total = n_repeat * data.shape[0] - seed_arr = self.rng.randint(1, n_total * n_total, size=n_total, dtype=np.int32) - rng_arr = np.array([np.random.RandomState(seed) for seed in seed_arr]) - # Create data and rng array - repeated_data_rng = [[repeated_data[ind,:],rng_arr[ind]] for ind in range(n_total)] - repeated_data_rng_pds = self.backend.parallelize(repeated_data_rng) - # Map the function on the data using the corresponding rng - repeated_data_result_pds = self.backend.map(map_function, repeated_data_rng_pds) - repeated_data_result = self.backend.collect(repeated_data_result_pds) - repeated_data, result = [list(t) for t in zip(*repeated_data_result)] - merged_result_data = [] - for ind in range(0, data.shape[0]): - merged_result_data.append([[[result[np.int(i)][0][0] \ - for i in - np.where(np.mean(repeated_data == data[ind, :], axis=1) == 1)[0]]], - data[ind, :]]) - return merged_result_data - # define helper functions for map step - def _simulate_data(self, data, npc=None): - """ - Simulate n_sample_per_param many datasets for new parameter - Parameters - ---------- - theta: numpy.ndarray - 1xp matrix containing the model parameters, where p is the number of parameters - Returns - ------- - (theta, sim_data) - tehta and simulate data - """ - - # Simulate the fake data from the model given the parameter value theta - # print("DEBUG: Simulate model for parameter " + str(theta)) - theta, rng = data[0], data[1] - self.set_parameters(theta) - y_sim = self.simulate(1, rng, npc=npc) - return (theta, y_sim) - - def _approx_calc(self, sim_data_parameter, npc=None): + def _approx_lik_calc(self, data, npc=None): """ Compute likelihood for new parameters using approximate likelihood function Parameters ---------- - sim_data_parameter: list - First element is the parameter and the second element is the simulated data + data: list + A list containing a parameter value and a random numpy state, e.g. [theta, rng] Returns ------- float The approximated likelihood function """ - # Extract data and parameter - y_sim, theta = sim_data_parameter[0], sim_data_parameter[1] + # Extract theta and rng + theta, rng = data[0], data[1] + + # Simulate the fake data from the model given the parameter value theta + self.logger.debug("Simulate model for parameter " + str(theta)) + self.set_parameters(theta) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) + self.logger.debug("Extracting observation.") obs = self.accepted_parameters_manager.observations_bds.value() + self.logger.debug("Computing likelihood...") total_pdf_at_theta = 1. lhd = self.likfun.likelihood(obs, y_sim) + self.logger.debug("Likelihood is :" + str(lhd)) pdf_at_theta = self.pdf_of_prior(self.model, theta) total_pdf_at_theta *= (pdf_at_theta * lhd) - return (total_pdf_at_theta, 1) + self.logger.debug("Prior pdf evaluated at theta is :" + str(pdf_at_theta)) + + return (total_pdf_at_theta, self.n_samples_per_param) def _calculate_weight(self, theta, npc=None): """ @@ -2101,9 +2071,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 accepted_dist, accepted_parameters = [list(t) for t in zip(*accepted_params_and_dist)] self.logger.info("Throw away N_alpha particles with largest dist") - # Throw away N_alpha particles with largest dist - # accepted_parameters = np.delete(accepted_parameters, np.arange(round(n_samples * alpha)) + ( - # self.n_samples - round(n_samples * alpha)), 0) + # Throw away N_alpha particles with largest distance del accepted_parameters[self.n_samples - round(n_samples * alpha):] accepted_dist = np.delete(accepted_dist, diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index 781cc2ec..75a2f46a 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -81,18 +81,21 @@ def test_sample(self): T, n_sample, n_samples_per_param = 1, 10, 100 sampler = PMC([self.model], [likfun], backend, seed = 1) journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param, covFactors = np.array([.1,.1]), iniPoints = None) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( + journal.get_parameters()['sigma']), np.array(journal.get_weights()) # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), + sigma_post_sample[0].shape[1]), post_weights.shape self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) - self.assertLess(abs(mu_post_mean - (-3.56042761)), 1e-3) - self.assertLess(abs(sigma_post_mean - 5.7553691), 1e-3) + self.assertLess(abs(mu_post_mean - (-3.3711206204663764)), 1e-3) + self.assertLess(abs(sigma_post_mean - 6.518520667688998), 1e-3) self.assertFalse(journal.number_of_simulations == 0) @@ -101,18 +104,21 @@ def test_sample(self): T, n_sample, n_samples_per_param = 2, 10, 100 sampler = PMC([self.model], [likfun], backend, seed = 1) journal = sampler.sample([y_obs], T, n_sample, n_samples_per_param, covFactors = np.array([.1,.1]), iniPoints = None) - mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array(journal.get_parameters()['sigma']), np.array(journal.get_weights()) - + mu_post_sample, sigma_post_sample, post_weights = np.array(journal.get_parameters()['mu']), np.array( + journal.get_parameters()['sigma']), np.array(journal.get_weights()) + # Compute posterior mean - mu_post_mean, sigma_post_mean = np.average(mu_post_sample, weights=post_weights, axis=0), np.average(sigma_post_sample, weights=post_weights, axis=0) + mu_post_mean, sigma_post_mean = journal.posterior_mean()['mu'], journal.posterior_mean()['sigma'] # test shape of sample - mu_sample_shape, sigma_sample_shape, weights_sample_shape = np.shape(mu_post_sample), np.shape(mu_post_sample), np.shape(post_weights) + mu_sample_shape, sigma_sample_shape, weights_sample_shape = (len(mu_post_sample), mu_post_sample[0].shape[1]), \ + (len(sigma_post_sample), + sigma_post_sample[0].shape[1]), post_weights.shape self.assertEqual(mu_sample_shape, (10,1)) self.assertEqual(sigma_sample_shape, (10,1)) self.assertEqual(weights_sample_shape, (10,1)) - self.assertLess(abs(mu_post_mean - (-3.25971092) ), 1e-3) - self.assertLess(abs(sigma_post_mean - 7.76172201), 1e-3) + self.assertLess(abs(mu_post_mean - (-2.970827684425406) ), 1e-3) + self.assertLess(abs(sigma_post_mean - 6.82165619013458), 1e-3) self.assertFalse(journal.number_of_simulations == 0) From 425b994b561d54c5075c3f4beb3f51a06b5681af Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Mon, 4 Feb 2019 13:15:58 +0000 Subject: [PATCH 13/20] Some fixes in SABC regarding newcovmat --- abcpy/inferences.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index b57d5d63..5016347f 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -269,13 +269,12 @@ def _sample_parameter(self, rng, npc=None): self.logger.warn("initial epsilon {:e} is larger than dist_max {:e}" .format(float(self.epsilon), distance)) - theta = np.array(self.get_parameters(self.model)).reshape(-1,) counter = 0 while distance > self.epsilon: # Accept new parameter value if the distance is less than epsilon self.sample_from_prior(rng=rng) - theta = np.array(self.get_parameters(self.model)).reshape(-1,) + theta = self.get_parameters(self.model) y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) counter+=1 if(y_sim is not None): From b33284679417d3cf2dffd6053c22d1ab18947b62 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Mon, 4 Feb 2019 16:59:38 +0000 Subject: [PATCH 14/20] Amending SABC, RejectionABC for HD --- abcpy/inferences.py | 29 ++++++++----------- abcpy/perturbationkernel.py | 56 ++++++++++++++++--------------------- tests/inferences_tests.py | 7 +++-- 3 files changed, 41 insertions(+), 51 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 5016347f..1a3849f2 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -237,7 +237,6 @@ def sample(self, observations, n_samples, n_samples_per_param, epsilon, full_out distances = np.array(distances) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_parameters=accepted_parameters) - journal.add_accepted_parameters(copy.deepcopy(accepted_parameters)) journal.add_weights(copy.deepcopy(np.ones((n_samples, 1)))) journal.add_distances(copy.deepcopy(distances)) @@ -1126,12 +1125,10 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: - if new_cov_mat.shape[0] > 1: - accepted_cov_mats.append( - beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) + if not(new_cov_mat.size == 1): + accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: - accepted_cov_mats.append( - beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(new_cov_mat.shape[0])) + accepted_cov_mats.append((beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1,1)) # Broadcast Accepted Covariance Matrix self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) @@ -1152,7 +1149,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ self._update_broadcasts(smooth_distances, all_distances) # 1: Calculate parameters - self.logger.info("Initial accepted parameter parameters") + self.logger.info("Initial accepted parameters") params_and_dists_pds = self.backend.map(self._accept_parameter, data_pds) params_and_dists = self.backend.collect(params_and_dists_pds) new_parameters, new_distances, new_all_parameters, new_all_distances, index, acceptance, counter = [list(t) for t in @@ -1214,11 +1211,11 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ # 5: Resampling if number of accepted particles greater than resample if accept >= resample and U > 1e-100: - ## Weighted resampling: + self.logger.info("Weighted resampling") weight = np.exp(-smooth_distances * delta / U) weight = weight / sum(weight) - index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace=1, p=weight) - accepted_parameters = accepted_parameters[index_resampled] + index_resampled = self.rng.choice(np.arange(n_samples, dtype=int), n_samples, replace=1, p=weight) + accepted_parameters = [accepted_parameters[i] for i in index_resampled] smooth_distances = smooth_distances[index_resampled] ## Update U and epsilon: @@ -1244,12 +1241,10 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: - if new_cov_mat.shape[0] > 1: - accepted_cov_mats.append( - beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) + if not(new_cov_mat.size == 1): + accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: - accepted_cov_mats.append( - beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(new_cov_mat.shape[0])) + accepted_cov_mats.append((beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1,1)) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) @@ -1276,10 +1271,10 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ new_cov_mats = self.kernel.calculate_cov(self.accepted_parameters_manager) accepted_cov_mats = [] for new_cov_mat in new_cov_mats: - if new_cov_mat.shape[0]>1: + if not(new_cov_mat.size == 1): accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * np.trace(new_cov_mat) * np.eye(new_cov_mat.shape[0])) else: - accepted_cov_mats.append(beta * new_cov_mat + 0.0001 * (new_cov_mat) * np.eye(new_cov_mat.shape[0])) + accepted_cov_mats.append((beta * new_cov_mat + 0.0001 * new_cov_mat).reshape(1,1)) self.accepted_parameters_manager.update_broadcast(self.backend, accepted_cov_mats=accepted_cov_mats) diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index c29825aa..0addc012 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -268,24 +268,22 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): list The covariance matrix corresponding to this kernel. """ + continuous_model = [[] for i in + range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] + for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): + if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], + (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): + continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] + else: + continuous_model[i] = np.concatenate( + accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) + continuous_model = np.array(continuous_model).astype(float) if(accepted_parameters_manager.accepted_weights_bds is not None): weights = accepted_parameters_manager.accepted_weights_bds.value() - continuous_model = [[] for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] - for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): - if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): - continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] - else: - continuous_model[i] = np.concatenate(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) - cov = np.cov(np.array(continuous_model).astype(float),aweights=weights.reshape(-1).astype(float), rowvar=False) - #cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), - # aweights=weights.reshape(-1).astype(float), rowvar=False) + cov = np.cov(continuous_model, aweights=weights.reshape(-1).astype(float), rowvar=False) else: - if(not(accepted_parameters_manager.accepted_parameters_bds.value().shape[1]>1)): - cov = np.var(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float)) - else: - cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), rowvar=False) - + cov = np.cov(continuous_model, rowvar=False) return cov @@ -404,28 +402,22 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): list The covariance matrix corresponding to this kernel. """ + continuous_model = [[] for i in + range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] + for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): + if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], + (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): + continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] + else: + continuous_model[i] = np.concatenate( + accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) + continuous_model = np.array(continuous_model).astype(float) if(accepted_parameters_manager.accepted_weights_bds is not None): weights = np.array(accepted_parameters_manager.accepted_weights_bds.value()) - continuous_model = [[] for i in - range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]))] - for i in range(len(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index])): - if isinstance(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i][0], - (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): - continuous_model[i] = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i] - else: - continuous_model[i] = np.concatenate( - accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][i]) - cov = np.cov(np.array(continuous_model).astype(float), aweights=weights.reshape(-1).astype(float), - rowvar=False) - # cov = np.cov( - # np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), aweights=weights.reshape(-1).astype(float), - # rowvar=False) + cov = np.cov(continuous_model, aweights=weights.reshape(-1).astype(float),rowvar=False) else: - if(not(accepted_parameters_manager.accepted_parameters_bds.value().shape[1]>1)): - cov = np.var(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float)) - else: - cov = np.cov(np.array(accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index]).astype(float), rowvar=False) + cov = np.cov(continuous_model, rowvar=False) return cov diff --git a/tests/inferences_tests.py b/tests/inferences_tests.py index 75a2f46a..56db701c 100644 --- a/tests/inferences_tests.py +++ b/tests/inferences_tests.py @@ -42,8 +42,11 @@ def test_sample(self): sigma_sample = np.array(journal.get_parameters()['sigma']) # test shape of samples - self.assertEqual(np.shape(mu_sample), (10,1)) - self.assertEqual(np.shape(sigma_sample), (10,1)) + mu_shape, sigma_shape = (len(mu_sample), mu_sample[0].shape[1]), \ + (len(sigma_sample), + sigma_sample[0].shape[1]) + self.assertEqual(mu_shape, (10,1)) + self.assertEqual(sigma_shape, (10,1)) # Compute posterior mean #self.assertAlmostEqual(np.average(np.asarray(samples[:,0])),1.22301,10e-2) From 23b6a07d4d276d08ffae4944ec38fdf26806b7f5 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 6 Feb 2019 12:50:28 +0000 Subject: [PATCH 15/20] Introduction of r-hit Kernel for SMCABC, bug fixes in perturbation kernel --- abcpy/inferences.py | 193 ++++++++++++++++++++++++------ abcpy/perturbationkernel.py | 31 ++--- tests/perturbationkernel_tests.py | 2 +- 3 files changed, 171 insertions(+), 55 deletions(-) diff --git a/abcpy/inferences.py b/abcpy/inferences.py index 1a3849f2..bd86ebd7 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -349,7 +349,7 @@ def __init__(self, root_models, distances, backend, kernel=None, seed=None): self.simulation_counter=0 - def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples_per_param = 1, epsilon_percentile = 0, covFactor = 2, full_output=0, journal_file = None): + def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples_per_param = 1, epsilon_percentile = 10, covFactor = 2, full_output=0, journal_file = None): """Samples from the posterior distribution of the model parameter given the observed data observations. @@ -368,12 +368,15 @@ def sample(self, observations, steps, epsilon_init, n_samples = 10000, n_samples n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. epsilon_percentile : float, optional - A value between [0, 100]. The default value is 0, meaning the threshold value provided by the user being used. + A value between [0, 100]. The default value is 10. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2 as considered in [1]. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. + journal_file: str, optional + Filename of a journal file to read an already saved journal file, from which the first iteration will start. + The default value is None. Returns ------- @@ -603,7 +606,8 @@ def _calculate_weight(self, theta, npc=None): mapping_for_kernels, garbage_index = self.accepted_parameters_manager.get_mapping(self.accepted_parameters_manager.model) for i in range(0, self.n_samples): - pdf_value = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, i, theta) + pdf_value = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, + self.accepted_parameters_manager.accepted_parameters_bds.value()[i], theta) denominator += self.accepted_parameters_manager.accepted_weights_bds.value()[i, 0] * pdf_value return 1.0 * prior_prob / denominator @@ -697,6 +701,10 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. + journal_file: str, optional + Filename of a journal file to read an already saved journal file, from which the first iteration will start. + The default value is None. + Returns ------- @@ -951,7 +959,8 @@ def _calculate_weight(self, theta, npc=None): self.accepted_parameters_manager.model) for i in range(0, self.n_samples): - pdf_value = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, i, theta) + pdf_value = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, + self.accepted_parameters_manager.accepted_parameters_bds.value()[i], theta) denominator+=self.accepted_parameters_manager.accepted_weights_bds.value()[i,0]*pdf_value return 1.0 * prior_prob / denominator @@ -1019,7 +1028,8 @@ def __init__(self, root_models, distances, backend, kernel=None, seed=None): self.simulation_counter = 0 - def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_param = 1, beta = 2, delta = 0.2, v = 0.3, ar_cutoff = 0.5, resample = None, n_update = None, adaptcov = 1, full_output=0, journal_file = None): + def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_param = 1, beta = 2, delta = 0.2, + v = 0.3, ar_cutoff = 0.1, resample = None, n_update = None, full_output=0, journal_file = None): """Samples from the posterior distribution of the model parameter given the observed data observations. @@ -1036,22 +1046,23 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. beta : numpy.float - Tuning parameter of SABC + Tuning parameter of SABC, default value is 2. delta : numpy.float - Tuning parameter of SABC + Tuning parameter of SABC, default value is 0.2. v : numpy.float, optional Tuning parameter of SABC, The default value is 0.3. ar_cutoff : numpy.float - Acceptance ratio cutoff, The default value is 0.5 + Acceptance ratio cutoff, The default value is 0.1. resample: int, optional - Resample after this many acceptance, The default value if n_samples + Resample after this many acceptance, The default value is None which takes value inside n_samples n_update: int, optional - Number of perturbed parameters at each step, The default value if n_samples - adaptcov : boolean, optional - Whether we adapt the covariance matrix in iteration stage. The default value TRUE. + Number of perturbed parameters at each step, The default value is None which takes value inside n_samples full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. + journal_file: str, optional + Filename of a journal file to read an already saved journal file, from which the first iteration will start. + The default value is None. Returns ------- @@ -1078,7 +1089,6 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ journal.configuration["ar_cutoff"] = ar_cutoff journal.configuration["resample"] = resample journal.configuration["n_update"] = n_update - journal.configuration["adaptcov"] = adaptcov journal.configuration["full_output"] = full_output else: journal = Journal.fromFile(journal_file) @@ -1207,6 +1217,7 @@ def sample(self, observations, steps, epsilon, n_samples = 10000, n_samples_per_ self.logger.debug(msg) if acceptance_rate < ar_cutoff: broken_preemptively = True + self.logger.debug("Stopping as acceptance rate is lower than cutoff") break # 5: Resampling if number of accepted particles greater than resample @@ -1516,12 +1527,21 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 A list, containing lists describing the observed data sets steps : integer Number of iterations in the sequential algoritm ("generations") + n_samples : integer, optional + Number of samples to generate. The default value is 10000. + n_samples_per_param : integer, optional + Number of data points in each simulated data set. The default value is 1. + chain_length : int, optional + The length of chains, default value is 10. But should be checked such that this is an divisor of n_samples. ap_change_cutoff : float, optional The cutoff value for the percentage change in the anneal parameter. If the change is less than ap_change_cutoff the iterations are stopped. The default value is 10. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. + journal_file: str, optional + Filename of a journal file to read an already saved journal file, from which the first iteration will start. + The default value is None. Returns ------- @@ -1749,8 +1769,8 @@ def _accept_parameter(self, rng_and_index, npc=None): ## Calculate acceptance probability: ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) - kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager,index, theta) - kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, perturbation_output[1]) + kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, perturbation_output[1], theta) + kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, theta, perturbation_output[1]) ratio_likelihood_prob = kernel_numerator / kernel_denominator acceptance_prob = min(1, ratio_prior_prob * ratio_likelihood_prob) * ( new_distance < self.anneal_parameter) @@ -1810,8 +1830,10 @@ def _update_cov_mat(self, rng_t, npc=None): self.logger.debug("Calculate acceptance probability.") ## Calculate acceptance probability: ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) - kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager,0 , theta) - kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager,0 , perturbation_output[1]) + kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, + perturbation_output[1], theta) + kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, theta, + perturbation_output[1]) ratio_likelihood_prob = kernel_numerator / kernel_denominator acceptance_prob = min(1, ratio_prior_prob * ratio_likelihood_prob) * (new_distance < self.anneal_parameter) if rng.binomial(1, acceptance_prob) == 1: @@ -1891,7 +1913,8 @@ def __init__(self, root_models, distances, backend, kernel=None, seed=None): self.simulation_counter = 0 - def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.1, epsilon_init = 100, epsilon_final = 0.1, const = 0.01, covFactor = 2.0, full_output=0, journal_file = None): + def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, alpha = 0.1, epsilon_init = 100, + epsilon_final = 0.1, const = 0.01, covFactor = 2.0, full_output=0, journal_file = None): """ Samples from the posterior distribution of the model parameter given the observed data observations. @@ -1913,12 +1936,15 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 epsilon_final : float, optional Terminal value of threshold, the default is 0.1 const : float, optional - A constant to compute acceptance probabilty + A constant to compute acceptance probabilty, the default is 0.01. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. + journal_file: str, optional + Filename of a journal file to read an already saved journal file, from which the first iteration will start. + The default value is None. Returns ------- @@ -2138,8 +2164,8 @@ def _accept_parameter(self, rng, npc=None): counter+=1 distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) - kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], theta) - kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], perturbation_output[1]) + kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, perturbation_output[1], theta) + kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, theta, perturbation_output[1]) ratio_kernel_prob = kernel_numerator / kernel_denominator probability_acceptance = min(1, ratio_prior_prob * ratio_kernel_prob) if distance < self.epsilon[-1] and rng.binomial(1, probability_acceptance) == 1: @@ -2232,12 +2258,15 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 alpha : float, optional A parameter taking values between [0,1], the default value is 0.1. acceptance_cutoff : float, optional - Acceptance ratio cutoff, should be chosen between 0.01 and 0.05 + Acceptance ratio cutoff, should be chosen between 0.01 and 0.03 covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. + journal_file: str, optional + Filename of a journal file to read an already saved journal file, from which the first iteration will start. + The default value is None. Returns ------- @@ -2438,7 +2467,8 @@ def _accept_parameter(self, rng, npc=None): prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) denominator = 0.0 for i in range(len(self.accepted_parameters_manager.accepted_weights_bds.value())): - pdf_value = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index[0], perturbation_output[1]) + pdf_value = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, + self.accepted_parameters_manager.accepted_parameters_bds.value()[index[0]], perturbation_output[1]) denominator += self.accepted_parameters_manager.accepted_weights_bds.value()[i, 0] * pdf_value weight = 1.0 * prior_prob / denominator @@ -2509,7 +2539,7 @@ def __init__(self, root_models, distances, backend, kernel = None, seed=None): def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1, epsilon_final = 0.1, alpha = 0.95, - covFactor = 2, resample = None, full_output=0, journal_file=None): + covFactor = 2, resample = None, full_output=0, which_mcmc_kernel = 0, journal_file=None): """Samples from the posterior distribution of the model parameter given the observed data observations. @@ -2519,20 +2549,27 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 A list, containing lists describing the observed data sets steps : integer Number of iterations in the sequential algoritm ("generations") - epsilon_final : float, optional - The final threshold value of epsilon to be reached. The default value is 0.1. n_samples : integer, optional Number of samples to generate. The default value is 10000. n_samples_per_param : integer, optional Number of data points in each simulated data set. The default value is 1. + epsilon_final : float, optional + The final threshold value of epsilon to be reached. The default value is 0.1. alpha : float, optional A parameter taking values between [0,1], determinining the rate of change of the threshold epsilon. The - default value is 0.5. + default value is 0.95. covFactor : float, optional scaling parameter of the covariance matrix. The default value is 2. full_output: integer, optional If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. + which_mcmc_kernel: integer, optional + Specifies which MCMC kernel to be used: '0' kernel suggestd in [1], any other value will use + r-hit kernel suggested by Anthony Lee. + The default value is 0. + journal_file: str, optional + Filename of a journal file to read an already saved journal file, from which the first iteration will start. + The default value is None. Returns ------- @@ -2633,9 +2670,7 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self.logger.info("Resampling") # Weighted resampling: index_resampled = self.rng.choice(np.arange(n_samples), n_samples, replace=1, p=new_weights) - print(accepted_parameters) accepted_parameters = accepted_parameters[index_resampled] - print(accepted_parameters) new_weights = np.ones(shape=(n_samples), ) * (1.0 / n_samples) # Update the weights @@ -2671,11 +2706,13 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 self._update_broadcasts(accepted_y_sim) # calculate resample parameters - self.logger.info("Resampling parameters") - params_and_ysim_pds = self.backend.map(self._accept_parameter, rng_and_index_pds) + self.logger.info("Drawing perturbed sampless") + if which_mcmc_kernel == 0: + params_and_ysim_pds = self.backend.map(self._accept_parameter, rng_and_index_pds) + else: + params_and_ysim_pds = self.backend.map(self._accept_parameter_r_hit_kernel, rng_and_index_pds) params_and_ysim = self.backend.collect(params_and_ysim_pds) new_parameters, new_y_sim, distances, counter = [list(t) for t in zip(*params_and_ysim)] - #new_parameters = np.array(new_parameters) distances = np.array(distances) for count in counter: @@ -2812,7 +2849,6 @@ def _accept_parameter(self, rng_and_index, npc=None): else: if self.accepted_parameters_manager.accepted_weights_bds.value()[index] > 0: theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[index] - #theta = np.array(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]).reshape(-1,) while True: perturbation_output = self.perturb(index, rng=rng) if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: @@ -2835,9 +2871,8 @@ def _accept_parameter(self, rng_and_index, npc=None): ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, theta) - kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, theta) - kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, index, - perturbation_output[1]) + kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, perturbation_output[1], theta) + kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, theta, perturbation_output[1]) ratio_likelihood_prob = kernel_numerator / kernel_denominator acceptance_prob = min(1, ratio_data_epsilon * ratio_prior_prob * ratio_likelihood_prob) @@ -2850,4 +2885,90 @@ def _accept_parameter(self, rng_and_index, npc=None): self.set_parameters(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]) y_sim = self.accepted_y_sim_bds.value()[index] distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) + return (self.get_parameters(), y_sim, distance, counter) + + def _accept_parameter_r_hit_kernel(self, rng_and_index, npc=None): + """ + Samples a single model parameter and simulate from it until + distance between simulated outcome and the observation is + smaller than epsilon. + + Parameters + ---------- + seed_and_index: numpy.ndarray + 2 dimensional array. The first entry specifies the initial seed for the random number generator. + The second entry defines the index in the data set. + + Returns + ------- + Tuple + The first entry of the tuple is the accepted parameters. The second entry is the simulated data set. + """ + + rng = rng_and_index[0] + index = rng_and_index[1] + rng.seed(rng.randint(np.iinfo(np.uint32).max, dtype=np.uint32)) + + # Set value of r for r-hit kernel + r = 3 + mapping_for_kernels, garbage_index = self.accepted_parameters_manager.get_mapping(self.accepted_parameters_manager.model) + + counter=0 + # print("on seed " + str(seed) + " distance: " + str(distance) + " epsilon: " + str(self.epsilon)) + if self.accepted_parameters_manager.accepted_parameters_bds is None: + self.sample_from_prior(rng=rng) + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) + counter+=1 + else: + if self.accepted_parameters_manager.accepted_weights_bds.value()[index] > 0: + theta = self.accepted_parameters_manager.accepted_parameters_bds.value()[index] + + # Sample from theta until we get 'r-1' y_sim inside the epsilon ball + self.set_parameters(theta) + accept_old_arr, y_sim_old_arr, N_old = [], [], 0 + while sum(accept_old_arr) < r-1: + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) + y_sim_old_arr.append(y_sim) + if self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), + y_sim) < self.epsilon[-1]: + accept_old_arr.append(N_old) + N_old += 1 + counter += 1 + + # Perturb and sample from the perturbed theta until we get 'r' y_sim inside the epsilon ball + while True: + perturbation_output = self.perturb(index, rng=rng) + if perturbation_output[0] and self.pdf_of_prior(self.model, perturbation_output[1]) != 0: + break + accept_new_arr, y_sim_new_arr, N = [], [], 0 + while sum(accept_new_arr) < r: + y_sim = self.simulate(self.n_samples_per_param, rng=rng, npc=npc) + y_sim_new_arr.append(y_sim) + if self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), + y_sim) < self.epsilon[-1]: + accept_new_arr.append(N) + counter += 1 + N += 1 + + #Calculate acceptance probability + ratio_prior_prob = self.pdf_of_prior(self.model, perturbation_output[1]) / self.pdf_of_prior(self.model, + theta) + kernel_numerator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, perturbation_output[1], theta) + kernel_denominator = self.kernel.pdf(mapping_for_kernels, self.accepted_parameters_manager, theta, perturbation_output[1]) + ratio_likelihood_prob = kernel_numerator / kernel_denominator + + acceptance_prob = min(1, (N_old/(N-1)) * ratio_prior_prob * ratio_likelihood_prob) + + if rng.binomial(1, acceptance_prob) == 1: + self.set_parameters(perturbation_output[1]) + # Randomly sample index J + J = rng.choice(accept_new_arr).astype(int) + y_sim = y_sim_new_arr[J] + else: + self.set_parameters(theta) + y_sim = self.accepted_y_sim_bds.value()[index] + else: + self.set_parameters(self.accepted_parameters_manager.accepted_parameters_bds.value()[index]) + y_sim = self.accepted_y_sim_bds.value()[index] + distance = self.distance.distance(self.accepted_parameters_manager.observations_bds.value(), y_sim) return (self.get_parameters(), y_sim, distance, counter) \ No newline at end of file diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index 0addc012..81ebffb3 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -209,7 +209,7 @@ def update(self, accepted_parameters_manager, row_index, rng=np.random.RandomSta return perturbed_values_including_models - def pdf(self, mapping, accepted_parameters_manager, index, x): + def pdf(self, mapping, accepted_parameters_manager, mean, x): """ Calculates the overall pdf of the kernel. Commonly used to calculate weights. @@ -231,16 +231,15 @@ def pdf(self, mapping, accepted_parameters_manager, index, x): """ result = 1. - for kernel_index, kernel in enumerate(self.kernels): # Define a list containing the parameter values relevant to the current kernel - theta = [] + mean_kernel, theta = [], [] for kernel_model in kernel.models: for model, model_output_index in mapping: if(kernel_model==model): theta.append(x[model_output_index]) - theta = np.array(theta) - result*=kernel.pdf(accepted_parameters_manager, kernel_index, index, theta) + mean_kernel.append(mean[model_output_index]) + result*=kernel.pdf(accepted_parameters_manager, kernel_index, mean_kernel, theta) return result @@ -337,7 +336,7 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra return perturbed_continuous_values - def pdf(self, accepted_parameters_manager, kernel_index, index, x): + def pdf(self, accepted_parameters_manager, kernel_index, mean, x): """Calculates the pdf of the kernel. Commonly used to calculate weights. @@ -357,15 +356,12 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): The pdf evaluated at point x. """ - # Gets the relevant accepted parameters from the manager in order to calculate the pdf - continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index] - - if isinstance(continuous_model_values[index][0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): - mean = np.array(continuous_model_values[index]).astype(float) + if isinstance(mean[0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): + mean = np.array(mean).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) - return multivariate_normal(mean, cov, allow_singular=True).pdf(x) + return multivariate_normal(mean, cov, allow_singular=True).pdf(np.array(x).astype(float)) else: - mean = np.array(np.concatenate(continuous_model_values[index])).astype(float) + mean = np.array(np.concatenate(mean)).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) return multivariate_normal(mean, cov, allow_singular=True).pdf(np.concatenate(x)) @@ -487,7 +483,7 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra return perturbed_continuous_values - def pdf(self, accepted_parameters_manager, kernel_index, index, x): + def pdf(self, accepted_parameters_manager, kernel_index, mean, x): """Calculates the pdf of the kernel. Commonly used to calculate weights. @@ -506,11 +502,10 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): float The pdf evaluated at point x. """ - continuous_model_values = accepted_parameters_manager.kernel_parameters_bds.value()[kernel_index][index] - if isinstance(continuous_model_values[0], + if isinstance(mean[0], (np.float, np.float32, np.float64, np.int, np.int32, np.int64)): - mean = np.array(continuous_model_values).astype(float) + mean = np.array(mean).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) v = self.df @@ -524,7 +519,7 @@ def pdf(self, accepted_parameters_manager, kernel_index, index, x): return density else: - mean = np.array(np.concatenate(continuous_model_values)).astype(float) + mean = np.array(np.concatenate(mean)).astype(float) cov = np.array(accepted_parameters_manager.accepted_cov_mats_bds.value()[kernel_index]).astype(float) v = self.df diff --git a/tests/perturbationkernel_tests.py b/tests/perturbationkernel_tests.py index fa8293da..b55498b7 100644 --- a/tests/perturbationkernel_tests.py +++ b/tests/perturbationkernel_tests.py @@ -95,7 +95,7 @@ def test_return_value(self): mapping, mapping_index = Manager.get_mapping(Manager.model) covs = [[[1,0],[0,1]],[]] Manager.update_broadcast(backend, accepted_cov_mats=covs) - pdf = kernel.pdf(mapping, Manager, 1, [2,0.3,0.1]) + pdf = kernel.pdf(mapping, Manager, Manager.accepted_parameters_bds.value()[1], [2,0.3,0.1]) self.assertTrue(isinstance(pdf, float)) From 2e0a1c7a81c48b7c580946cb0f593e511a88dcfd Mon Sep 17 00:00:00 2001 From: Ritabrata Dutta Date: Wed, 13 Feb 2019 11:18:31 +0000 Subject: [PATCH 16/20] Update requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index e87f6dc1..9fb82a89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,5 @@ glmnet==2.0.0 sphinx sphinx_rtd_theme coverage +mpi4py +cloudpickle From 6bece5acbcac7670e6085364e614d4094aa2f216 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 13 Feb 2019 21:45:53 +0000 Subject: [PATCH 17/20] Updates in discrete random walk kernel --- README.md | 2 +- abcpy/discretemodels.py | 12 +++++++----- abcpy/inferences.py | 5 ++--- abcpy/perturbationkernel.py | 6 +++--- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 5dbe9df9..08a54ac3 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ algorithms and other likelihood-free inference schemes. It presently includes: * RejectionABC * PMCABC (Population Monte Carlo ABC) -* SMCABC (Sequential Monte Carlo ABC) +* SMCABC (Sequential Monte Carlo ABC) * RSMCABC (Replenishment SMC-ABC) * APMCABC (Adaptive Population Monte Carlo ABC) * SABC (Simulated Annealing ABC) diff --git a/abcpy/discretemodels.py b/abcpy/discretemodels.py index 61bb3cb4..858ab18c 100644 --- a/abcpy/discretemodels.py +++ b/abcpy/discretemodels.py @@ -369,9 +369,8 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState()): list: [np.ndarray] A list containing the sampled values as np-array. """ - - result = np.array(rng.randint(input_values[0], input_values[1], size=k, dtype=np.int64)) - return [np.array([x]) for x in result] + result = np.array(rng.randint(input_values[0], input_values[1]+1, size=k, dtype=np.int64)) + return [np.array([x]).reshape(-1,) for x in result] def get_output_dimension(self): return self._dimension @@ -391,8 +390,11 @@ def pmf(self, input_values, x): float: The pmf evaluated at point x. """ - upperbound, lowerbound = input_values[0], input_values[1] - pmf = 1. / (upperbound - lowerbound + 1) + lowerbound, upperbound = input_values[0], input_values[1] + if x >= lowerbound and x <= upperbound: + pmf = 1. / (upperbound - lowerbound + 1) + else: + pmf = 0 self.calculated_pmf = pmf return pmf diff --git a/abcpy/inferences.py b/abcpy/inferences.py index bd86ebd7..d3fc611a 100644 --- a/abcpy/inferences.py +++ b/abcpy/inferences.py @@ -2564,9 +2564,8 @@ def sample(self, observations, steps, n_samples = 10000, n_samples_per_param = 1 If full_output==1, intermediate results are included in output journal. The default value is 0, meaning the intermediate results are not saved. which_mcmc_kernel: integer, optional - Specifies which MCMC kernel to be used: '0' kernel suggestd in [1], any other value will use - r-hit kernel suggested by Anthony Lee. - The default value is 0. + Specifies which MCMC kernel to be used: '0' kernel suggestd in [1], any other value will use r-hit kernel + suggested by Anthony Lee. The default value is 0. journal_file: str, optional Filename of a journal file to read an already saved journal file, from which the first iteration will start. The default value is None. diff --git a/abcpy/perturbationkernel.py b/abcpy/perturbationkernel.py index 81ebffb3..b399450d 100644 --- a/abcpy/perturbationkernel.py +++ b/abcpy/perturbationkernel.py @@ -574,7 +574,7 @@ def update(self, accepted_parameters_manager, kernel_index, row_index, rng=np.ra # Implement a random walk for the discrete parameter values for discrete_value in discrete_model_values: - perturbed_discrete_values.append(rng.randint(discrete_value - 1, discrete_value + 2)) + perturbed_discrete_values.append(np.array([rng.randint(discrete_value - 1, discrete_value + 2)])) return perturbed_discrete_values @@ -585,10 +585,10 @@ def calculate_cov(self, accepted_parameters_manager, kernel_index): random walk, it returns an empty list. """ - return [] + return np.array([0]).reshape(-1,) - def pmf(self, accepted_parameters_manager, kernel_index, index, x): + def pmf(self, accepted_parameters_manager, kernel_index, mean, x): """ Calculates the pmf of the kernel. Commonly used to calculate weights. From 2fc9594b4f1325d42837b4453babfb2998f2df46 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Mon, 18 Feb 2019 14:42:32 +0000 Subject: [PATCH 18/20] Correcting the inconsistencies in Documentation --- doc/source/getting_started.rst | 22 +++++++++---------- .../pmcabc_gaussian_summary_selection.py | 2 -- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/doc/source/getting_started.rst b/doc/source/getting_started.rst index 74c5c9da..dd3df413 100644 --- a/doc/source/getting_started.rst +++ b/doc/source/getting_started.rst @@ -16,7 +16,7 @@ measurement of heights and the probabilistic model would be Gaussian. .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 72 + :lines: 74-75, 80-82 :dedent: 4 The Gaussian or Normal model has two parameters: the mean, denoted by :math:`\mu`, and the standard deviation, denoted @@ -29,7 +29,7 @@ between random variables or between random variables and observed data. Each of variables (output of another :py:class:`ProbabilisticModel ` object) or constant values and considered known to the user (:py:class:`Hyperparameters `). If you are interested in implementing your own probabilistic model, -please check the implementing a new model section. +please check the :ref:`implementing a new model section `. To define a parameter of a model as a random variable, you start by assigning a *prior* distribution on them. We can utilize *prior* knowledge about these parameters as *prior* distribution. In the absence of prior knowledge, we still @@ -43,7 +43,7 @@ follows: .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 74-76, 77-79 + :lines: 76-82 :dedent: 4 We have defined the parameter :math:`\mu` and :math:`\sigma` of the Gaussian model as random variables and assigned @@ -68,7 +68,7 @@ first define a way to extract *summary statistics* from the dataset. .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 82-83 + :lines: 84-86 :dedent: 4 Next we define the discrepancy measure between the datasets, by defining a distance function (LogReg distance is chosen @@ -79,7 +79,7 @@ the datasets, and then compute the distance between the two statistics. .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 86-87 + :lines: 88-90 :dedent: 4 Algorithms in ABCpy often require a perturbation kernel, a tool to explore the parameter space. Here, we use the default @@ -89,7 +89,7 @@ discrete. For a more involved example, please consult `Complex Perturbation Kern .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 90-91 + :lines: 92-94 :dedent: 4 Finally, we need to specify a backend that determines the parallelization framework to use. The example code here uses @@ -99,7 +99,7 @@ available in ABCpy, please consult :ref:`Using Parallelization Backends Date: Tue, 19 Feb 2019 14:28:59 +0000 Subject: [PATCH 19/20] Corrections and fixes in documentation --- abcpy/probabilisticmodels.py | 23 +++++++++- doc/source/user_customization.rst | 73 +++++++++++++++++-------------- 2 files changed, 61 insertions(+), 35 deletions(-) diff --git a/abcpy/probabilisticmodels.py b/abcpy/probabilisticmodels.py index 92df078c..7a3d625c 100644 --- a/abcpy/probabilisticmodels.py +++ b/abcpy/probabilisticmodels.py @@ -680,7 +680,7 @@ def _check_output(self, values): @abstractmethod - def forward_simulate(self, input_values, k, rng, mpi_comm): + def forward_simulate(self, input_values, k, rng, mpi_comm=None): """ Provides the output (pseudo data) from a forward simulation of the current model. @@ -701,6 +701,9 @@ def forward_simulate(self, input_values, k, rng, mpi_comm): rng: Random number generator Defines the random number generator to be used. The default value uses a random seed to initialize the generator. + mpi_comm: MPI communicator object + Defines the MPI communicator object for MPI parallelization. The default value is None, + meaning the forward simulation is not MPI-parallelized. Returns ------- @@ -981,6 +984,9 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com The number of samples that should be sampled rng: random number generator The random number generator to be used. + mpi_comm: MPI communicator object + Defines the MPI communicator object for MPI parallelization. The default value is None, + meaning the forward simulation is not MPI-parallelized. Returns ------- @@ -1025,6 +1031,9 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com The number of samples that should be sampled rng: random number generator The random number generator to be used. + mpi_comm: MPI communicator object + Defines the MPI communicator object for MPI parallelization. The default value is None, + meaning the forward simulation is not MPI-parallelized. Returns ------- @@ -1067,6 +1076,9 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com The number of samples that should be sampled rng: random number generator The random number generator to be used. + mpi_comm: MPI communicator object + Defines the MPI communicator object for MPI parallelization. The default value is None, + meaning the forward simulation is not MPI-parallelized. Returns ------- @@ -1109,6 +1121,9 @@ def forward_simulate(self, input_valus, k, rng=np.random.RandomState(), mpi_comm The number of samples that should be sampled rng: random number generator The random number generator to be used. + mpi_comm: MPI communicator object + Defines the MPI communicator object for MPI parallelization. The default value is None, + meaning the forward simulation is not MPI-parallelized. Returns ------- @@ -1171,6 +1186,9 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com The number of samples that should be sampled rng: random number generator The random number generator to be used. + mpi_comm: MPI communicator object + Defines the MPI communicator object for MPI parallelization. The default value is None, + meaning the forward simulation is not MPI-parallelized. Returns ------- @@ -1233,6 +1251,9 @@ def forward_simulate(self, input_values, k, rng=np.random.RandomState(), mpi_com The number of samples that should be sampled rng: random number generator The random number generator to be used. + mpi_comm: MPI communicator object + Defines the MPI communicator object for MPI parallelization. The default value is None, + meaning the forward simulation is not MPI-parallelized. Returns ------- diff --git a/doc/source/user_customization.rst b/doc/source/user_customization.rst index c785fd94..887880ba 100644 --- a/doc/source/user_customization.rst +++ b/doc/source/user_customization.rst @@ -11,12 +11,13 @@ through the details of such a scenario using the (already implemented) Gaussian implement it from scratch. There are two scenarios to use a model: First, we want to use our probabilistic model to explain a relationship between -parameters* (considered random variables for inference) and *observed data*. This is for example the case when we want -to do inference on mechanistic models that do not have a PDF. In this case, our model implementation has to derive from -:py:class:`ProbabilisticModel ` and a few abstract methods have to be -defined, as for example :py:meth:`forward_simulate() `. +*parameters* (considered random variables for inference). In the second case, we use them to explain a relationship between +*parameters* and *observed data*, as example when we want +to do inference on mechanistic models that do not have a PDF. In both these case, our model implementation has to derive from +:py:class:`ProbabilisticModel ` class of ABCpy and a few abstract methods have to be +defined, as for example :py:meth:`forward_simulate() `. -In the second scenario, we want to use the model to build a relationship between *different parameters* (between +In the first scenario, we want to use the model to build a relationship between *different parameters* (between different random variables). Then our model is restricted to either output continuous or discrete parameters in form of a vector. Consequently, the model must derive from either from :py:class:`Continuous ` or :py:class:`Discrete ` and implement the @@ -33,24 +34,27 @@ implement at least the following methods: * :py:meth:`ProbabilisticModels.forward_simulate() ` * :py:meth:`ProbabilisticModels.get_output_dimension() ` -We want our model to work in both described scenarios, so our model also has to conform to the API of +If we want our model to work in both the described scenarios, so our model also has to conform to the API of :py:class:`Continuous ` since the model output, which is the resulting data from a -forward simulation, is from a continuous domain. For completeness, here the abstract methods defined by +forward simulation, is from a continuous domain. For completeness, here are the abstract methods defined by :py:class:`Continuous ` and :py:class:`Discrete -`: +` correspondingly: * :py:meth:`Continuous.pdf() ` * :py:meth:`Discrete.pmf() ` +If we want our model to work only for the second scenario (typically the case for mechanistic or simulator models) and not to be +used to build priors on parameters, we do not need to write the above two abstract methods. + Initializing a New Model ^^^^^^^^^^^^^^^^^^^^^^^^ -Since a Gaussian model generates continous numbers, the newly implement class derives from +Since a Gaussian model generates continous numbers, the newly implemented class derives from :py:class:`Continuous ` and the header look as follows: .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 7 + :lines: 6, 10 A good way to start implementing a new model is to define a convenient way to initialize it with its input parameters. In ABCpy all input parameters are either independent ProbabilisticModels or Hyperparameters. Thus, they should not be @@ -65,12 +69,12 @@ object to it. However, it would be very inconvenient to initialize our Gaussian model with an InputConnector object. We rather like the init function to accept a list of parameters :code:`[mu, sigma]`, where :code:`mu` is the mean and :code:`sigma` is the standard deviation which are the sole two parameters of our generative Gaussian model. So the idea is to take -a convenient input and transform it it an InputConnection object that in turn can be passed to the initializer of +a convenient input and transform it to an InputConnection object that in turn can be passed to the initializer of the super class. This leads to the following implementation: .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 12-21 + :lines: 15-24 :dedent: 4 :linenos: @@ -94,7 +98,7 @@ hyperparameters like model = Gaussian([0, 1]) the :code:`from_list()` method will automatically create two HyperParameter objects :code:`HyperParameter(0)` and -:code:`HyperParameter(1)` and will link the our current Gaussian model inputs to them. If we initialize :code:`mu` and +:code:`HyperParameter(1)` and will link our current Gaussian model inputs to them. If we initialize :code:`mu` and :code:`sigma` with existing models like .. code-block:: python @@ -123,7 +127,7 @@ This leads to the following implementation: .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 24-35 + :lines: 27-38 :dedent: 4 :linenos: @@ -140,7 +144,7 @@ A proper implementation look as follows: .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 50-60 + :lines: 53-63 :dedent: 4 :linenos: @@ -167,13 +171,13 @@ Then, this function should return :code:`False` as soon as values are out of the .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 38-43 + :lines: 41-46 :dedent: 4 :linenos: Note that implementing this method is particularly important when using the current model as input for other models, -hence in the second scenario described in `Implementing a new Model`_. In case our model should only be used for the -first scenario, it is safe to omit the check and return true. +hence in the first scenario described in `Implementing a new Model`_. In case our model should only be used for the +second scenario, it is safe to omit the check and return true. Getting the Output Dimension @@ -188,13 +192,13 @@ Since our model generates a single float number in one forward simulation, the i .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 46-47 + :lines: 49-50 :dedent: 4 :linenos: Note that implementing this method is particularly important when using the current model as input for other models, -hence in the second scenario described in `Implementing a new Model`_. In case our model should only be used for the -first scenario, it is safe to return 1. +hence in the first scenario described in `Implementing a new Model`_. In case our model should only be used for the +second scenario, it is safe to return 1. Calculating the Probability Density Function @@ -211,7 +215,7 @@ as follows: .. literalinclude:: ../../examples/extensions/models/gaussian_python/pmcabc_gaussian_model_simple.py :language: python - :lines: 63-68 + :lines: 66-71 :dedent: 4 :linenos: @@ -363,25 +367,26 @@ calculator should be provided. The following header conforms to this idea: .. literalinclude:: ../../abcpy/distances.py :language: python - :lines: 112-118 + :lines: 113-120 :dedent: 4 Then, we need to define how the distance is calculated. First we compute the summary statistics from the datasets and then compute the distance between the summary statistics. Notice, while computing the summary statistics we save the first dataset and the corresponding summary statistics. This is since we always pass the observed dataset first to the distance function. The observed dataset does not change during an inference computation and thus it is efficient to -compute it once and store it internally. +compute it once and store it internally. (Notice, here the first input data is considered to be the observed data. Hence, +to save computation time of summary statistics from observed data, we save the summary from the observed data ad reuse them.) .. literalinclude:: ../../abcpy/distances.py :language: python - :lines: 134-146 + :lines: 122-156 :dedent: 4 Finally, we need to define the maximal distance that can be obtained from this distance measure. .. literalinclude:: ../../abcpy/distances.py :language: python - :lines: 149-150 + :lines: 159-160 :dedent: 4 The newly defined distance class can be used in the same way as the already existing once. The complete example for this @@ -405,13 +410,13 @@ implemented: .. literalinclude:: ../../abcpy/perturbationkernel.py :language: python - :lines: 99 + :lines: 101 On the other hand, if the kernel is a discrete kernel, we would need the following method: .. literalinclude:: ../../abcpy/perturbationkernel.py :language: python - :lines: 107 + :lines: 109 As an example, we will implement a kernel which perturbs continuous parameters using a multivariate normal distribution (which is already implemented within ABCpy). First, we need to define a constructor. @@ -450,9 +455,9 @@ the values relevant to your kernel, for example the current calculated covarianc Let us now look at the implementation of the method: -.. literalinclude:: ../../examples/extensions/perturbationkernels/multivariate_normal_kernel.py +.. literalinclude:: ../../abcpy/perturbationkernel.py :language: python - :lines: 10, 25-30 + :lines: 254-286 :dedent: 4 Some of the implemented inference algorithms weigh different sets of parameters differently. Therefore, if such weights @@ -479,9 +484,9 @@ pass this argument. Here the implementation for our kernel: -.. literalinclude:: ../../examples/extensions/perturbationkernels/multivariate_normal_kernel.py +.. literalinclude:: ../../abcpy/perturbationkernel.py :language: python - :lines: 32, 53, 56-60 + :lines: 289-336 :dedent: 4 The first line shows how you obtain the values of the parameters that your kernel should perturb. These values are @@ -496,9 +501,9 @@ Continuous Kernel or a Discrete Kernel: This method is implemented as follows for the multivariate normal: -.. literalinclude:: ../../examples/extensions/perturbationkernels/multivariate_normal_kernel.py +.. literalinclude:: ../../abcpy/perturbationkernel.py :language: python - :lines: 62, 83-87 + :lines: 339-366 :dedent: 4 We simply obtain the parameter values and covariance matrix for this kernel and calculate the probability density From 8f84efb7d636141c9e17722f36d4e2960ceef933 Mon Sep 17 00:00:00 2001 From: statrita2004 Date: Wed, 27 Feb 2019 10:39:52 +0000 Subject: [PATCH 20/20] Correcting version, readme for release-0.5.6 --- README.md | 8 ++++---- VERSION | 2 +- doc/source/installation.rst | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 08a54ac3..73d0a215 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,9 @@ scientists by providing # Documentation For more information, check out the -* [Documentation](http://abcpy.readthedocs.io/en/v0.5.5) -* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.5/examples) directory and -* [Reference](http://abcpy.readthedocs.io/en/v0.5.5/abcpy.html) +* [Documentation](http://abcpy.readthedocs.io/en/v0.5.6) +* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.5.6/examples) directory and +* [Reference](http://abcpy.readthedocs.io/en/v0.5.6/abcpy.html) Further, we provide a @@ -55,7 +55,7 @@ finally CSCS (Swiss National Super Computing Center) for their generous support. There is a paper in the proceedings of the 2017 PASC conference. In case you use ABCpy for your publication, we would appreciate a citation. You can use -[this](https://github.com/eth-cscs/abcpy/blob/v0.5.5/doc/literature/DuttaS-ABCpy-PASC-2017.bib) +[this](https://github.com/eth-cscs/abcpy/blob/v0.5.6/doc/literature/DuttaS-ABCpy-PASC-2017.bib) BibTex reference. diff --git a/VERSION b/VERSION index d1d899fa..b49b2533 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.5.5 +0.5.6 diff --git a/doc/source/installation.rst b/doc/source/installation.rst index 24b05a43..97521a93 100644 --- a/doc/source/installation.rst +++ b/doc/source/installation.rst @@ -35,7 +35,7 @@ To create a package and install it, do make package - pip3 install build/dist/abcpy-0.5.5-py3-none-any.whl + pip3 install build/dist/abcpy-0.5.6-py3-none-any.whl Note that ABCpy requires Python3.