From 2a8f21761f20220d929e8ee8ad90817be51c779c Mon Sep 17 00:00:00 2001 From: Iason Krommydas Date: Mon, 1 Apr 2024 16:19:57 -0500 Subject: [PATCH] feat: add `Novosibirsk` PDF (#68) * add novosibirsk, no tests yet * fix default parameter value * rename Lambda to lambd * add tests, and black formatting * black formatted way too much * undo more incorrect formatting * final formattng undoing * ok I lied, this is final : * Update zfit_physics/models/pdf_novosibirsk.py Co-authored-by: Jonas Eschle * Update zfit_physics/models/pdf_novosibirsk.py Co-authored-by: Jonas Eschle * Update zfit_physics/models/pdf_novosibirsk.py Co-authored-by: Jonas Eschle * typo * revert last two commits * Update zfit_physics/models/pdf_novosibirsk.py * pre-commit run -a fixes * rename parameters in function defs * test tail edge cases --------- Co-authored-by: Jonas Eschle --- CHANGELOG.rst | 1 + tests/test_pdf_erfexp.py | 6 +- tests/test_pdf_novosibirsk.py | 99 ++++++++++++++ tests/test_pdf_tsallis.py | 1 + zfit_physics/models/pdf_novosibirsk.py | 176 +++++++++++++++++++++++++ zfit_physics/pdf.py | 3 +- 6 files changed, 283 insertions(+), 3 deletions(-) create mode 100644 tests/test_pdf_novosibirsk.py create mode 100644 zfit_physics/models/pdf_novosibirsk.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index be992d4..5b7dc20 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,6 +10,7 @@ Major Features and Improvements - added CMSShape PDF - added Cruijff PDF - added ErfExp PDF +- added Novosibirsk PDF - added Tsallis PDF Breaking changes diff --git a/tests/test_pdf_erfexp.py b/tests/test_pdf_erfexp.py index c3b8ef3..d9d09f5 100644 --- a/tests/test_pdf_erfexp.py +++ b/tests/test_pdf_erfexp.py @@ -37,10 +37,12 @@ def test_erfexp_pdf(): # Test PDF here erfexp, _ = create_erfexp(mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true, limits=(50, 130)) assert erfexp.pdf(90.0, norm=False).numpy().item() == pytest.approx( - erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true), rel=1e-8 + erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true), + rel=1e-8, ) assert erfexp.pdf(90.0).numpy().item() == pytest.approx( - erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true) / 71.18838, rel=1e-8 + erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true) / 71.18838, + rel=1e-8, ) np.testing.assert_allclose( erfexp.pdf(tf.range(50.0, 130, 10_000), norm=False), diff --git a/tests/test_pdf_novosibirsk.py b/tests/test_pdf_novosibirsk.py new file mode 100644 index 0000000..27453ec --- /dev/null +++ b/tests/test_pdf_novosibirsk.py @@ -0,0 +1,99 @@ +"""Tests for Novosibirsk PDF.""" + +import numpy as np +import pytest +import ROOT +import tensorflow as tf +import zfit +from zfit.core.testing import tester + +import zfit_physics as zphys + +mu_true = 90.0 +sigma_true = 10.0 +lambd_true = 0.5 + +true_integral_dict = { + 1e-10: 25.06469498570457, + 0.5: 23.021160811717586, + 1.0: 18.148056725398746, + 10.0: 0.6499205017648246, +} + + +def create_novosibirsk(mu, sigma, lambd, limits): + obs = zfit.Space("obs1", limits) + novosibirsk = zphys.pdf.Novosibirsk(mu=mu, sigma=sigma, lambd=lambd, obs=obs) + return novosibirsk, obs + + +def create_and_eval_root_novosibirsk_and_integral(mu, sigma, lambd, limits, x, lower, upper): + obs = ROOT.RooRealVar("obs", "obs", *limits) + peak = ROOT.RooRealVar("peak", "peak", mu) + width = ROOT.RooRealVar("width", "width", sigma) + tail = ROOT.RooRealVar("tail", "tail", lambd) + novosibirsk = ROOT.RooNovosibirsk("novosibirsk", "novosibirsk", obs, peak, width, tail) + + out = np.zeros_like(x) + for i, xi in enumerate(x): + obs.setVal(xi) + out[i] = novosibirsk.getVal(ROOT.RooArgSet(obs)) + + obs.setRange("integrationRange", lower, upper) + integral = novosibirsk.createIntegral(ROOT.RooArgSet(obs), ROOT.RooFit.Range("integrationRange")).getVal() + return out, integral + + +@pytest.mark.parametrize("lambd_true", [1e-10, 0.5, 1.0, 10.0]) +def test_novosibirsk_pdf(lambd_true): + # Teat PDF here + novosibirsk, _ = create_novosibirsk(mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130)) + novosibirsk_root_90 = create_and_eval_root_novosibirsk_and_integral( + mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([90.0]), lower=50, upper=130 + )[0] + assert novosibirsk.pdf(90.0).numpy() == pytest.approx(novosibirsk_root_90, rel=1e-5) + test_values = tf.range(50.0, 130, 10_000) + novosibirsk_root_test_values = create_and_eval_root_novosibirsk_and_integral( + mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=test_values, lower=50, upper=130 + )[0] + np.testing.assert_allclose(novosibirsk.pdf(test_values).numpy(), novosibirsk_root_test_values, rtol=1e-5) + assert novosibirsk.pdf(test_values) <= novosibirsk.pdf(90.0) + sample = novosibirsk.sample(1000) + assert all(np.isfinite(sample.value())), "Some samples from the Novosibirsk PDF are NaN or infinite" + assert sample.n_events == 1000 + assert all(tf.logical_and(50 <= sample.value(), sample.value() <= 130)) + + +@pytest.mark.parametrize("lambd_true", [1e-10, 0.5, 1.0, 10.0]) +def test_novosibirsk_integral(lambd_true): + # Test CDF and integral here + novosibirsk, obs = create_novosibirsk(mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130)) + full_interval_analytic = novosibirsk.analytic_integrate(obs, norm=False).numpy() + full_interval_numeric = novosibirsk.numeric_integrate(obs, norm=False).numpy() + true_integral = true_integral_dict[lambd_true] + root_integral = create_and_eval_root_novosibirsk_and_integral( + mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([50, 130]), lower=50, upper=130 + )[1] + assert full_interval_analytic == pytest.approx(true_integral, 1e-5) + assert full_interval_numeric == pytest.approx(true_integral, 1e-3) + assert full_interval_analytic == pytest.approx(root_integral, 1e-5) + assert full_interval_numeric == pytest.approx(root_integral, 1e-3) + + analytic_integral = novosibirsk.analytic_integrate(limits=(80, 100), norm=False).numpy() + numeric_integral = novosibirsk.numeric_integrate(limits=(80, 100), norm=False).numpy() + root_integral = create_and_eval_root_novosibirsk_and_integral( + mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([80, 100]), lower=80, upper=100 + )[1] + assert analytic_integral == pytest.approx(numeric_integral, 1e-3) + assert analytic_integral == pytest.approx(root_integral, 1e-3) + + +# register the pdf here and provide sets of working parameter configurations +def novosibirsk_params_factory(): + mu = zfit.Parameter("mu", mu_true) + sigma = zfit.Parameter("sigma", sigma_true) + lambd = zfit.Parameter("lambd", lambd_true) + return {"mu": mu, "sigma": sigma, "lambd": lambd} + + +tester.register_pdf(pdf_class=zphys.pdf.Novosibirsk, params_factories=novosibirsk_params_factory) diff --git a/tests/test_pdf_tsallis.py b/tests/test_pdf_tsallis.py index 14ecdd4..2d46bca 100644 --- a/tests/test_pdf_tsallis.py +++ b/tests/test_pdf_tsallis.py @@ -1,4 +1,5 @@ """Tests for CMSShape PDF.""" + import numpy as np import pytest import tensorflow as tf diff --git a/zfit_physics/models/pdf_novosibirsk.py b/zfit_physics/models/pdf_novosibirsk.py new file mode 100644 index 0000000..ef00d58 --- /dev/null +++ b/zfit_physics/models/pdf_novosibirsk.py @@ -0,0 +1,176 @@ +from typing import Optional + +import numpy as np +import tensorflow as tf +import zfit +import zfit.z.numpy as znp +from zfit import z +from zfit.core.space import ANY_LOWER, ANY_UPPER, Space +from zfit.util import ztyping + + +@z.function(wraps="tensor") +def novosibirsk_pdf(x, mu, sigma, lambd): + """Calculate the Novosibirsk PDF. + + Args: + x: value(s) for which the PDF will be calculated. + peak: peak of the distribution. + width: width of the distribution. + tail: tail of the distribution. + + Returns: + `tf.Tensor`: The calculated PDF values. + + Notes: + Function taken from H. Ikeda et al. NIM A441 (2000), p. 401 (Belle Collaboration) + Based on code from `ROOT `_ + """ + # rename parameters to follow roofit implementation + peak = mu + width = sigma + tail = lambd + x = z.unstack_x(x) + + cond1 = znp.less(znp.abs(tail), 1e-7) + arg = 1.0 - (x - peak) * tail / width + + cond2 = znp.less(arg, 1e-7) + log_arg = znp.log(arg) + xi = 2 * np.sqrt(np.log(4.0)) + + width_zero = (2.0 / xi) * znp.arcsinh(tail * xi * 0.5) + width_zero2 = width_zero**2 + exponent = (-0.5 / width_zero2 * log_arg**2) - (width_zero2 * 0.5) + gauss = znp.exp(-0.5 * ((x - peak) / width) ** 2) + + return znp.where(cond1, gauss, znp.where(cond2, 0.0, znp.exp(exponent))) + + +def novosibirsk_integral(limits: ztyping.SpaceType, params: dict, model) -> tf.Tensor: + """Calculates the analytic integral of the Novosibirsk PDF. + + Args: + limits: An object with attribute limit1d. + params: A hashmap from which the parameters that defines the PDF will be extracted. + model: Will be ignored. + + Returns: + The calculated integral. + """ + lower, upper = limits.limit1d + mu = params["mu"] + sigma = params["sigma"] + lambd = params["lambd"] + + return novosibirsk_integral_func(mu=mu, sigma=sigma, lambd=lambd, lower=lower, upper=upper) + + +@z.function(wraps="tensor") +def novosibirsk_integral_func(mu, sigma, lambd, lower, upper): + """Calculate the integral of the Novosibirsk PDF. + + Args: + peak: peak of the distribution. + width: width of the distribution. + tail: tail of the distribution. + lower: lower limit of the integral. + upper: upper limit of the integral. + + Returns: + `tf.Tensor`: The calculated integral. + + Notes: + Based on code from `ROOT `_ + """ + # rename parameters to follow roofit implementation + peak = mu + width = sigma + tail = lambd + sqrt2 = np.sqrt(2) + sqlog2 = np.sqrt(np.log(2)) + sqlog4 = np.sqrt(np.log(4)) + log4 = np.log(4) + rootpiby2 = np.sqrt(np.pi / 2) + sqpibylog2 = np.sqrt(np.pi / np.log(2)) + + cond = znp.less(znp.abs(tail), 1e-7) + + xscale = sqrt2 * width + result_gauss = rootpiby2 * width * (tf.math.erf((upper - peak) / xscale) - tf.math.erf((lower - peak) / xscale)) + + log_argument_A = znp.maximum(((peak - lower) * tail + width) / width, 1e-7) + log_argument_B = znp.maximum(((peak - upper) * tail + width) / width, 1e-7) + + term1 = znp.arcsinh(tail * sqlog4) + term1_2 = term1**2 + erf_termA = (term1_2 - log4 * znp.log(log_argument_A)) / (2 * term1 * sqlog2) + erf_termB = (term1_2 - log4 * znp.log(log_argument_B)) / (2 * term1 * sqlog2) + + result_novosibirsk = 0.5 / tail * width * term1 * (tf.math.erf(erf_termB) - tf.math.erf(erf_termA)) * sqpibylog2 + + return znp.where(cond, result_gauss, result_novosibirsk) + + +class Novosibirsk(zfit.pdf.BasePDF): + _N_OBS = 1 + + def __init__( + self, + mu, + sigma, + lambd, + obs, + *, + extended: Optional[ztyping.ExtendedInputType] = False, + norm: Optional[ztyping.NormInputType] = None, + name: str = "Novosibirsk", + ): + """Novosibirsk PDF. + + The Novosibirsk function is a continuous probability density function (PDF) that is used to model + asymmetric peaks in high-energy physics. It is a theoretical Compton spectrum with a logarithmic Gaussian function. + + .. math:: + f(x;\\sigma, x_0, \\Lambda) = \\exp\\left[ + -\\frac{1}{2} \\frac{\\left( \\ln q_y \\right)^2 }{\\Lambda^2} + \\Lambda^2 \\right] \\\\ + q_y(x;\\sigma,x_0,\\Lambda) = 1 + \\frac{\\Lambda(x-x_0)}{\\sigma} \\times + \\frac{\\sinh \\left( \\Lambda \\sqrt{\\ln 4} \\right)}{\\Lambda \\sqrt{\\ln 4}} + + Args: + mu: The peak of the distribution. + sigma: The width of the distribution. + lambd: The tail of the distribution. + obs: |@doc:pdf.init.obs| Observables of the + model. This will be used as the default space of the PDF and, + if not given explicitly, as the normalization range. + + The default space is used for example in the sample method: if no + sampling limits are given, the default space is used. + + The observables are not equal to the domain as it does not restrict or + truncate the model outside this range. |@docend:pdf.init.obs| + extended: |@doc:pdf.init.extended| The overall yield of the PDF. + If this is parameter-like, it will be used as the yield, + the expected number of events, and the PDF will be extended. + An extended PDF has additional functionality, such as the + ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended| + norm: |@doc:pdf.init.norm| Normalization of the PDF. + By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm| + name: |@doc:pdf.init.name| Human-readable name + or label of + the PDF for better identification. + Has no programmatical functional purpose as identification. |@docend:pdf.init.name| + """ + params = {"mu": mu, "sigma": sigma, "lambd": lambd} + super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm) + + def _unnormalized_pdf(self, x): + mu = self.params["mu"] + sigma = self.params["sigma"] + lambd = self.params["lambd"] + return novosibirsk_pdf(x, mu=mu, sigma=sigma, lambd=lambd) + + +novosibirsk_integral_limits = Space(axes=0, limits=(ANY_LOWER, ANY_UPPER)) +Novosibirsk.register_analytic_integral(func=novosibirsk_integral, limits=novosibirsk_integral_limits) diff --git a/zfit_physics/pdf.py b/zfit_physics/pdf.py index b15e200..eaa103e 100644 --- a/zfit_physics/pdf.py +++ b/zfit_physics/pdf.py @@ -2,7 +2,8 @@ from .models.pdf_cmsshape import CMSShape from .models.pdf_cruijff import Cruijff from .models.pdf_erfexp import ErfExp +from .models.pdf_novosibirsk import Novosibirsk from .models.pdf_relbw import RelativisticBreitWigner from .models.pdf_tsallis import Tsallis -__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape", "Cruijff", "ErfExp", "Tsallis"] +__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape", "Cruijff", "ErfExp", "Novosibirsk", "Tsallis"]