diff --git a/docs/api.rst b/docs/api.rst index 98103f6..9180b15 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -109,6 +109,14 @@ Default PDFs third_moment.third_moment_expansion third_moment.compute_third_moments +Simple PDFs +~~~~~~~~~~~ + +.. autosummary:: + :toctree: _generated/ + + simple_pdf.Poisson + Exceptions ---------- diff --git a/docs/plugins.rst b/docs/plugins.rst index e2da93c..c0d2e76 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -283,6 +283,38 @@ Once again, the exclusion limit can be computed as * ``analysis`` (optional): Unique identifier for the analysis. * ``xsection`` (optional): Cross-section value for the signal hypothesis. Units determined by the user. +``'default_pdf.poisson'`` +~~~~~~~~~~~~~~~~~~~~~~~~~ + +Simple Poisson implementation without uncertainties which can be described as follows; + +.. math:: + + \mathcal{L}(\mu) = \prod_{i\in{\rm bins}}{\rm Poiss}(n^i|\mu n_s^i + n_b^i) + +It can take any number of yields. + +.. code-block:: python3 + :linenos: + + >>> pdf_wrapper = spey.get_backend("default_pdf.poisson") + >>> statistical_model = pdf_wrapper( + ... signal_yields=[12.0, 15.0], + ... background_yields=[50.0,48.0], + ... data=[36, 33], + ... analysis="example", + ... xsection=0.123, + ... ) + +**Arguments:** + + * ``signal_yields``: keyword for signal yields. It can take one or more values as a list or NumPy array. + * ``background_yields``: keyword for background-only expectations. It can take one or more values as a list or NumPy array. + * ``data``: keyword for observations. It can take one or more values as a list or NumPy array. + * ``analysis`` (optional): Unique identifier for the analysis. + * ``xsection`` (optional): Cross-section value for the signal hypothesis. Units determined by the user. + + External Plug-ins ----------------- diff --git a/docs/releases/changelog-v0.1.md b/docs/releases/changelog-v0.1.md index afb31ae..3d312fa 100644 --- a/docs/releases/changelog-v0.1.md +++ b/docs/releases/changelog-v0.1.md @@ -1,4 +1,4 @@ -# Release notes v0.1.2 +# Release notes v0.1.3 ## New features since last release @@ -9,6 +9,10 @@ * $\chi^2$ function has been extended to compute background + signal vs background only model. ([#17](https://github.com/SpeysideHEP/spey/pull/17)) +* Poisson based likelihood constructor without uncertainties has been added + (Request by Veronica Sanz for EFT studies). + ([#22](https://github.com/SpeysideHEP/spey/pull/22)) + ## Improvements * Backend inspection has been added for the models that act like intermediate functions. @@ -23,6 +27,9 @@ chi-square computer. See issue [#19](https://github.com/SpeysideHEP/spey/issues/19). ([#20](https://github.com/SpeysideHEP/spey/pull/20)) +* Execution error fix during likelihood computation for models with single nuisance parameter. + ([#22](https://github.com/SpeysideHEP/spey/pull/22)) + ## Contributors This release contains contributions from (in alphabetical order): diff --git a/setup.py b/setup.py index 6e6acce..1a964d1 100644 --- a/setup.py +++ b/setup.py @@ -19,6 +19,7 @@ "default_pdf.correlated_background = spey.backends.default_pdf:CorrelatedBackground", "default_pdf.third_moment_expansion = spey.backends.default_pdf:ThirdMomentExpansion", "default_pdf.effective_sigma = spey.backends.default_pdf:EffectiveSigma", + "default_pdf.poisson = spey.backends.default_pdf.simple_pdf:Poisson", ] setup( diff --git a/src/spey/_version.py b/src/spey/_version.py index 64e8e2a..f4376b6 100644 --- a/src/spey/_version.py +++ b/src/spey/_version.py @@ -1,3 +1,3 @@ """Version number (major.minor.patch[-label])""" -__version__ = "0.1.2" +__version__ = "0.1.3" diff --git a/src/spey/backends/default_pdf/simple_pdf.py b/src/spey/backends/default_pdf/simple_pdf.py new file mode 100644 index 0000000..e0cacb5 --- /dev/null +++ b/src/spey/backends/default_pdf/simple_pdf.py @@ -0,0 +1,289 @@ +"""This file contains basic likelihood implementations""" + +from typing import Callable, List, Optional, Text, Tuple, Union + +from autograd import grad, hessian +from autograd import numpy as np + +from spey._version import __version__ +from spey.backends.distributions import MainModel +from spey.base import BackendBase, ModelConfig +from spey.utils import ExpectationType + +# pylint: disable=E1101,E1120,W0613 + + +class Poisson(BackendBase): + r""" + Poisson distribution without uncertainty implementation. + + .. math:: + + \mathcal{L}(\mu) = \prod_{i\in{\rm bins}}{\rm Poiss}(n^i|\mu n_s^i + n_b^i) + + where :math:`n_{s,b}` are signal and background yields and :math:`n` are the observations. + + Args: + signal_yields (``List[float]``): signal yields + background_yields (``List[float]``): background yields + data (``List[int]``): data + """ + + name: Text = "default_pdf.poisson" + """Name of the backend""" + version: Text = __version__ + """Version of the backend""" + author: Text = "SpeysideHEP" + """Author of the backend""" + spey_requires: Text = __version__ + """Spey version required for the backend""" + + __slots__ = ["_model", "_main_model"] + + def __init__( + self, + signal_yields: List[float], + background_yields: List[float], + data: List[int], + ): + self.data = np.array(data, dtype=np.float64) + self.signal_yields = np.array(signal_yields, dtype=np.float64) + self.background_yields = np.array(background_yields, dtype=np.float64) + + minimum_poi = -np.inf + if self.is_alive: + minimum_poi = -np.min( + self.background_yields[self.signal_yields > 0.0] + / self.signal_yields[self.signal_yields > 0.0] + ) + + self._main_model = None + + self._config = ModelConfig( + poi_index=0, + minimum_poi=minimum_poi, + suggested_init=[1.0], + suggested_bounds=[(minimum_poi, 10)], + ) + + @property + def is_alive(self) -> bool: + """Returns True if at least one bin has non-zero signal yield.""" + return np.any(self.signal_yields > 0.0) + + def config( + self, allow_negative_signal: bool = True, poi_upper_bound: float = 10.0 + ) -> ModelConfig: + r""" + Model configuration. + + Args: + allow_negative_signal (``bool``, default ``True``): If ``True`` :math:`\hat\mu` + value will be allowed to be negative. + poi_upper_bound (``float``, default ``40.0``): upper bound for parameter + of interest, :math:`\mu`. + + Returns: + ~spey.base.ModelConfig: + Model configuration. Information regarding the position of POI in + parameter list, suggested input and bounds. + """ + if allow_negative_signal and poi_upper_bound == 10.0: + return self._config + + return ModelConfig( + self._config.poi_index, + self._config.minimum_poi, + self._config.suggested_init, + [(0, poi_upper_bound)], + ) + + @property + def main_model(self) -> MainModel: + """retreive the main model distribution""" + if self._main_model is None: + + def lam(pars: np.ndarray) -> np.ndarray: + """ + Compute lambda for Main model with third moment expansion. + For details see above eq 2.6 in :xref:`1809.05548` + + Args: + pars (``np.ndarray``): nuisance parameters + + Returns: + ``np.ndarray``: + expectation value of the poisson distribution with respect to + nuisance parameters. + """ + return pars[0] * self.signal_yields + self.background_yields + + self._main_model = MainModel(lam) + + return self._main_model + + def get_objective_function( + self, + expected: ExpectationType = ExpectationType.observed, + data: Optional[np.ndarray] = None, + do_grad: bool = True, + ) -> Callable[[np.ndarray], Union[Tuple[float, np.ndarray], float]]: + r""" + Objective function i.e. negative log-likelihood, :math:`-\log\mathcal{L}(\mu, \theta)` + + Args: + expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and + p-values to be computed. + + * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit + prescriotion which means that the experimental data will be assumed to be the truth + (default). + * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via + post-fit prescriotion which means that the experimental data will be assumed to be + the truth. + * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit + prescription which means that the SM will be assumed to be the truth. + data (``np.ndarray``, default ``None``): input data that to fit + do_grad (``bool``, default ``True``): If ``True`` return objective and its gradient + as ``tuple`` if ``False`` only returns objective function. + + Returns: + ``Callable[[np.ndarray], Union[float, Tuple[float, np.ndarray]]]``: + Function which takes fit parameters (:math:`\mu` and :math:`\theta`) and returns either + objective or objective and its gradient. + """ + current_data = ( + self.background_yields if expected == ExpectationType.apriori else self.data + ) + data = current_data if data is None else data + + def negative_loglikelihood(pars: np.ndarray) -> np.ndarray: + """Compute twice negative log-likelihood""" + return -self.main_model.log_prob(pars, data) + + if do_grad: + grad_negative_loglikelihood = grad(negative_loglikelihood, argnum=0) + return lambda pars: ( + negative_loglikelihood(pars), + grad_negative_loglikelihood(pars), + ) + + return negative_loglikelihood + + def get_logpdf_func( + self, + expected: ExpectationType = ExpectationType.observed, + data: Optional[np.array] = None, + ) -> Callable[[np.ndarray, np.ndarray], float]: + r""" + Generate function to compute :math:`\log\mathcal{L}(\mu, \theta)` where :math:`\mu` is the + parameter of interest and :math:`\theta` are nuisance parameters. + + Args: + expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and + p-values to be computed. + + * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit + prescriotion which means that the experimental data will be assumed to be the truth + (default). + * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via + post-fit prescriotion which means that the experimental data will be assumed to be + the truth. + * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit + prescription which means that the SM will be assumed to be the truth. + data (``np.array``, default ``None``): input data that to fit + + Returns: + ``Callable[[np.ndarray], float]``: + Function that takes fit parameters (:math:`\mu` and :math:`\theta`) and computes + :math:`\log\mathcal{L}(\mu, \theta)`. + """ + current_data = ( + self.background_yields if expected == ExpectationType.apriori else self.data + ) + data = current_data if data is None else data + + return lambda pars: self.main_model.log_prob(pars, data) + + def get_hessian_logpdf_func( + self, + expected: ExpectationType = ExpectationType.observed, + data: Optional[np.ndarray] = None, + ) -> Callable[[np.ndarray], float]: + r""" + Currently Hessian of :math:`\log\mathcal{L}(\mu, \theta)` is only used to compute + variance on :math:`\mu`. This method returns a callable function which takes fit + parameters (:math:`\mu` and :math:`\theta`) and returns Hessian. + + Args: + expected (~spey.ExpectationType): Sets which values the fitting algorithm should focus and + p-values to be computed. + + * :obj:`~spey.ExpectationType.observed`: Computes the p-values with via post-fit + prescriotion which means that the experimental data will be assumed to be the truth + (default). + * :obj:`~spey.ExpectationType.aposteriori`: Computes the expected p-values with via + post-fit prescriotion which means that the experimental data will be assumed to be + the truth. + * :obj:`~spey.ExpectationType.apriori`: Computes the expected p-values with via pre-fit + prescription which means that the SM will be assumed to be the truth. + data (``np.ndarray``, default ``None``): input data that to fit + + Returns: + ``Callable[[np.ndarray], float]``: + Function that takes fit parameters (:math:`\mu` and :math:`\theta`) and + returns Hessian of :math:`\log\mathcal{L}(\mu, \theta)`. + """ + current_data = ( + self.background_yields if expected == ExpectationType.apriori else self.data + ) + data = current_data if data is None else data + + def log_prob(pars: np.ndarray) -> np.ndarray: + """Compute log-probability""" + return self.main_model.log_prob(pars, data) + + return hessian(log_prob, argnum=0) + + def get_sampler(self, pars: np.ndarray) -> Callable[[int], np.ndarray]: + r""" + Retreives the function to sample from. + + Args: + pars (``np.ndarray``): fit parameters (:math:`\mu` and :math:`\theta`) + include_auxiliary (``bool``): wether or not to include auxiliary data + coming from the constraint model. + + Returns: + ``Callable[[int, bool], np.ndarray]``: + Function that takes ``number_of_samples`` as input and draws as many samples + from the statistical model. + """ + + def sampler(sample_size: int, *args, **kwargs) -> np.ndarray: + """ + Fucntion to generate samples. + + Args: + sample_size (``int``): number of samples to be generated. + + Returns: + ``np.ndarray``: + generated samples + """ + return self.main_model.sample(pars, sample_size) + + return sampler + + def expected_data(self, pars: List[float], **kwargs) -> List[float]: + r""" + Compute the expected value of the statistical model + + Args: + pars (``List[float]``): nuisance, :math:`\theta` and parameter of interest, + + Returns: + ``List[float]``: + Expected data of the statistical model + """ + return self.main_model.expected_data(pars) diff --git a/src/spey/base/hypotest_base.py b/src/spey/base/hypotest_base.py index a91d698..ea26ecf 100644 --- a/src/spey/base/hypotest_base.py +++ b/src/spey/base/hypotest_base.py @@ -345,16 +345,17 @@ def chi2( value of the :math:`\chi^2`. """ if poi_test_denominator is None: - denominator = self.maximize_likelihood( + _, denominator = self.maximize_likelihood( expected=expected, allow_negative_signal=allow_negative_signal, **kwargs - )[-1] + ) else: denominator = self.likelihood( poi_test=poi_test_denominator, expected=expected, **kwargs ) return 2.0 * ( - self.likelihood(poi_test=poi_test, expected=expected, **kwargs) - denominator + self.likelihood(poi_test=poi_test, expected=expected, **kwargs) + - denominator ) def _prepare_for_hypotest( @@ -630,7 +631,6 @@ def exclusion_confidence_level( ) elif calculator == "toy": - signal_samples = self.fixed_poi_sampler( poi_test=poi_test, size=self.ntoys, expected=expected, **kwargs ) @@ -707,7 +707,8 @@ def maximize_likelihood( pvalues = [ 1.0 - chi2.cdf( - chi_square, 1 if isinstance(poi_test, (float, int)) else len(poi_test) + chi_square, + 1 if isinstance(poi_test, (float, int)) else len(poi_test), ) ] expected_pvalues = pvalues @@ -769,7 +770,9 @@ def significance( logpdf, maximum_asimov_likelihood, logpdf_asimov, - ) = self._prepare_for_hypotest(expected=expected, test_statistics="q0", **kwargs) + ) = self._prepare_for_hypotest( + expected=expected, test_statistics="q0", **kwargs + ) sqrt_q0, sqrt_q0A, delta_teststat = compute_teststatistics( 0.0, diff --git a/src/spey/interface/statistical_model.py b/src/spey/interface/statistical_model.py index 6f5b564..4b71967 100644 --- a/src/spey/interface/statistical_model.py +++ b/src/spey/interface/statistical_model.py @@ -166,7 +166,8 @@ def excluded_cross_section( raise UnknownCrossSection("Cross-section value has not been initialised.") return ( - self.poi_upper_limit(expected=expected, confidence_level=0.95) * self.xsection + self.poi_upper_limit(expected=expected, confidence_level=0.95) + * self.xsection ) @property @@ -299,7 +300,7 @@ def likelihood( fit_opts["model_configuration"].npar == 1 and fit_opts["model_configuration"].poi_index is not None ): - logpdf = fit_opts["logpdf"](poi_test) + logpdf = fit_opts["logpdf"]([poi_test]) else: logpdf, _ = fit( **fit_opts,