feat: add Novosibirsk PDF (#68)

* add novosibirsk, no tests yet * fix default parameter value * rename Lambda to lambd * add tests, and black formatting * black formatted way too much * undo more incorrect formatting * final formattng undoing * ok I lied, this is final : * Update zfit_physics/models/pdf_novosibirsk.py Co-authored-by: Jonas Eschle <[email protected]> * Update zfit_physics/models/pdf_novosibirsk.py Co-authored-by: Jonas Eschle <[email protected]> * Update zfit_physics/models/pdf_novosibirsk.py Co-authored-by: Jonas Eschle <[email protected]> * typo * revert last two commits * Update zfit_physics/models/pdf_novosibirsk.py * pre-commit run -a fixes * rename parameters in function defs * test tail edge cases --------- Co-authored-by: Jonas Eschle <[email protected]>
zfit · Apr 1, 2024 · 2a8f217 · 2a8f217
1 parent b260af3
commit 2a8f217
Show file tree

Hide file tree

Showing 6 changed files with 283 additions and 3 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -10,6 +10,7 @@ Major Features and Improvements
 - added CMSShape PDF
 - added Cruijff PDF
 - added ErfExp PDF
+- added Novosibirsk PDF
 - added Tsallis PDF
 
 Breaking changes

diff --git a/tests/test_pdf_erfexp.py b/tests/test_pdf_erfexp.py
@@ -37,10 +37,12 @@ def test_erfexp_pdf():
     # Test PDF here
     erfexp, _ = create_erfexp(mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true, limits=(50, 130))
     assert erfexp.pdf(90.0, norm=False).numpy().item() == pytest.approx(
-        erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true), rel=1e-8
+        erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true),
+        rel=1e-8,
     )
     assert erfexp.pdf(90.0).numpy().item() == pytest.approx(
-        erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true) / 71.18838, rel=1e-8
+        erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true) / 71.18838,
+        rel=1e-8,
     )
     np.testing.assert_allclose(
         erfexp.pdf(tf.range(50.0, 130, 10_000), norm=False),

diff --git a/tests/test_pdf_novosibirsk.py b/tests/test_pdf_novosibirsk.py
@@ -0,0 +1,99 @@
+"""Tests for Novosibirsk PDF."""
+
+import numpy as np
+import pytest
+import ROOT
+import tensorflow as tf
+import zfit
+from zfit.core.testing import tester
+
+import zfit_physics as zphys
+
+mu_true = 90.0
+sigma_true = 10.0
+lambd_true = 0.5
+
+true_integral_dict = {
+    1e-10: 25.06469498570457,
+    0.5: 23.021160811717586,
+    1.0: 18.148056725398746,
+    10.0: 0.6499205017648246,
+}
+
+
+def create_novosibirsk(mu, sigma, lambd, limits):
+    obs = zfit.Space("obs1", limits)
+    novosibirsk = zphys.pdf.Novosibirsk(mu=mu, sigma=sigma, lambd=lambd, obs=obs)
+    return novosibirsk, obs
+
+
+def create_and_eval_root_novosibirsk_and_integral(mu, sigma, lambd, limits, x, lower, upper):
+    obs = ROOT.RooRealVar("obs", "obs", *limits)
+    peak = ROOT.RooRealVar("peak", "peak", mu)
+    width = ROOT.RooRealVar("width", "width", sigma)
+    tail = ROOT.RooRealVar("tail", "tail", lambd)
+    novosibirsk = ROOT.RooNovosibirsk("novosibirsk", "novosibirsk", obs, peak, width, tail)
+
+    out = np.zeros_like(x)
+    for i, xi in enumerate(x):
+        obs.setVal(xi)
+        out[i] = novosibirsk.getVal(ROOT.RooArgSet(obs))
+
+    obs.setRange("integrationRange", lower, upper)
+    integral = novosibirsk.createIntegral(ROOT.RooArgSet(obs), ROOT.RooFit.Range("integrationRange")).getVal()
+    return out, integral
+
+
+@pytest.mark.parametrize("lambd_true", [1e-10, 0.5, 1.0, 10.0])
+def test_novosibirsk_pdf(lambd_true):
+    # Teat PDF here
+    novosibirsk, _ = create_novosibirsk(mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130))
+    novosibirsk_root_90 = create_and_eval_root_novosibirsk_and_integral(
+        mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([90.0]), lower=50, upper=130
+    )[0]
+    assert novosibirsk.pdf(90.0).numpy() == pytest.approx(novosibirsk_root_90, rel=1e-5)
+    test_values = tf.range(50.0, 130, 10_000)
+    novosibirsk_root_test_values = create_and_eval_root_novosibirsk_and_integral(
+        mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=test_values, lower=50, upper=130
+    )[0]
+    np.testing.assert_allclose(novosibirsk.pdf(test_values).numpy(), novosibirsk_root_test_values, rtol=1e-5)
+    assert novosibirsk.pdf(test_values) <= novosibirsk.pdf(90.0)
+    sample = novosibirsk.sample(1000)
+    assert all(np.isfinite(sample.value())), "Some samples from the Novosibirsk PDF are NaN or infinite"
+    assert sample.n_events == 1000
+    assert all(tf.logical_and(50 <= sample.value(), sample.value() <= 130))
+
+
+@pytest.mark.parametrize("lambd_true", [1e-10, 0.5, 1.0, 10.0])
+def test_novosibirsk_integral(lambd_true):
+    # Test CDF and integral here
+    novosibirsk, obs = create_novosibirsk(mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130))
+    full_interval_analytic = novosibirsk.analytic_integrate(obs, norm=False).numpy()
+    full_interval_numeric = novosibirsk.numeric_integrate(obs, norm=False).numpy()
+    true_integral = true_integral_dict[lambd_true]
+    root_integral = create_and_eval_root_novosibirsk_and_integral(
+        mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([50, 130]), lower=50, upper=130
+    )[1]
+    assert full_interval_analytic == pytest.approx(true_integral, 1e-5)
+    assert full_interval_numeric == pytest.approx(true_integral, 1e-3)
+    assert full_interval_analytic == pytest.approx(root_integral, 1e-5)
+    assert full_interval_numeric == pytest.approx(root_integral, 1e-3)
+
+    analytic_integral = novosibirsk.analytic_integrate(limits=(80, 100), norm=False).numpy()
+    numeric_integral = novosibirsk.numeric_integrate(limits=(80, 100), norm=False).numpy()
+    root_integral = create_and_eval_root_novosibirsk_and_integral(
+        mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([80, 100]), lower=80, upper=100
+    )[1]
+    assert analytic_integral == pytest.approx(numeric_integral, 1e-3)
+    assert analytic_integral == pytest.approx(root_integral, 1e-3)
+
+
+# register the pdf here and provide sets of working parameter configurations
+def novosibirsk_params_factory():
+    mu = zfit.Parameter("mu", mu_true)
+    sigma = zfit.Parameter("sigma", sigma_true)
+    lambd = zfit.Parameter("lambd", lambd_true)
+    return {"mu": mu, "sigma": sigma, "lambd": lambd}
+
+
+tester.register_pdf(pdf_class=zphys.pdf.Novosibirsk, params_factories=novosibirsk_params_factory)
diff --git a/tests/test_pdf_tsallis.py b/tests/test_pdf_tsallis.py
@@ -1,4 +1,5 @@
 """Tests for CMSShape PDF."""
+
 import numpy as np
 import pytest
 import tensorflow as tf

diff --git a/zfit_physics/models/pdf_novosibirsk.py b/zfit_physics/models/pdf_novosibirsk.py
@@ -0,0 +1,176 @@
+from typing import Optional
+
+import numpy as np
+import tensorflow as tf
+import zfit
+import zfit.z.numpy as znp
+from zfit import z
+from zfit.core.space import ANY_LOWER, ANY_UPPER, Space
+from zfit.util import ztyping
+
+
+@z.function(wraps="tensor")
+def novosibirsk_pdf(x, mu, sigma, lambd):
+    """Calculate the Novosibirsk PDF.
+
+    Args:
+        x: value(s) for which the PDF will be calculated.
+        peak: peak of the distribution.
+        width: width of the distribution.
+        tail: tail of the distribution.
+
+    Returns:
+        `tf.Tensor`: The calculated PDF values.
+
+    Notes:
+        Function taken from H. Ikeda et al. NIM A441 (2000), p. 401 (Belle Collaboration)
+        Based on code from `ROOT <https://root.cern.ch/doc/master/RooNovosibirsk_8cxx_source.html>`_
+    """
+    # rename parameters to follow roofit implementation
+    peak = mu
+    width = sigma
+    tail = lambd
+    x = z.unstack_x(x)
+
+    cond1 = znp.less(znp.abs(tail), 1e-7)
+    arg = 1.0 - (x - peak) * tail / width
+
+    cond2 = znp.less(arg, 1e-7)
+    log_arg = znp.log(arg)
+    xi = 2 * np.sqrt(np.log(4.0))
+
+    width_zero = (2.0 / xi) * znp.arcsinh(tail * xi * 0.5)
+    width_zero2 = width_zero**2
+    exponent = (-0.5 / width_zero2 * log_arg**2) - (width_zero2 * 0.5)
+    gauss = znp.exp(-0.5 * ((x - peak) / width) ** 2)
+
+    return znp.where(cond1, gauss, znp.where(cond2, 0.0, znp.exp(exponent)))
+
+
+def novosibirsk_integral(limits: ztyping.SpaceType, params: dict, model) -> tf.Tensor:
+    """Calculates the analytic integral of the Novosibirsk PDF.
+
+    Args:
+        limits: An object with attribute limit1d.
+        params: A hashmap from which the parameters that defines the PDF will be extracted.
+        model: Will be ignored.
+
+    Returns:
+        The calculated integral.
+    """
+    lower, upper = limits.limit1d
+    mu = params["mu"]
+    sigma = params["sigma"]
+    lambd = params["lambd"]
+
+    return novosibirsk_integral_func(mu=mu, sigma=sigma, lambd=lambd, lower=lower, upper=upper)
+
+
+@z.function(wraps="tensor")
+def novosibirsk_integral_func(mu, sigma, lambd, lower, upper):
+    """Calculate the integral of the Novosibirsk PDF.
+
+    Args:
+        peak: peak of the distribution.
+        width: width of the distribution.
+        tail: tail of the distribution.
+        lower: lower limit of the integral.
+        upper: upper limit of the integral.
+
+    Returns:
+        `tf.Tensor`: The calculated integral.
+
+    Notes:
+        Based on code from `ROOT <https://root.cern.ch/doc/master/RooNovosibirsk_8cxx_source.html>`_
+    """
+    # rename parameters to follow roofit implementation
+    peak = mu
+    width = sigma
+    tail = lambd
+    sqrt2 = np.sqrt(2)
+    sqlog2 = np.sqrt(np.log(2))
+    sqlog4 = np.sqrt(np.log(4))
+    log4 = np.log(4)
+    rootpiby2 = np.sqrt(np.pi / 2)
+    sqpibylog2 = np.sqrt(np.pi / np.log(2))
+
+    cond = znp.less(znp.abs(tail), 1e-7)
+
+    xscale = sqrt2 * width
+    result_gauss = rootpiby2 * width * (tf.math.erf((upper - peak) / xscale) - tf.math.erf((lower - peak) / xscale))
+
+    log_argument_A = znp.maximum(((peak - lower) * tail + width) / width, 1e-7)
+    log_argument_B = znp.maximum(((peak - upper) * tail + width) / width, 1e-7)
+
+    term1 = znp.arcsinh(tail * sqlog4)
+    term1_2 = term1**2
+    erf_termA = (term1_2 - log4 * znp.log(log_argument_A)) / (2 * term1 * sqlog2)
+    erf_termB = (term1_2 - log4 * znp.log(log_argument_B)) / (2 * term1 * sqlog2)
+
+    result_novosibirsk = 0.5 / tail * width * term1 * (tf.math.erf(erf_termB) - tf.math.erf(erf_termA)) * sqpibylog2
+
+    return znp.where(cond, result_gauss, result_novosibirsk)
+
+
+class Novosibirsk(zfit.pdf.BasePDF):
+    _N_OBS = 1
+
+    def __init__(
+        self,
+        mu,
+        sigma,
+        lambd,
+        obs,
+        *,
+        extended: Optional[ztyping.ExtendedInputType] = False,
+        norm: Optional[ztyping.NormInputType] = None,
+        name: str = "Novosibirsk",
+    ):
+        """Novosibirsk PDF.
+
+        The Novosibirsk function is a continuous probability density function (PDF) that is used to model
+        asymmetric peaks in high-energy physics. It is a theoretical Compton spectrum with a logarithmic Gaussian function.
+
+        .. math::
+            f(x;\\sigma, x_0, \\Lambda) = \\exp\\left[
+                -\\frac{1}{2} \\frac{\\left( \\ln q_y \\right)^2 }{\\Lambda^2} + \\Lambda^2 \\right] \\\\
+            q_y(x;\\sigma,x_0,\\Lambda) = 1 + \\frac{\\Lambda(x-x_0)}{\\sigma} \\times
+            \\frac{\\sinh \\left( \\Lambda \\sqrt{\\ln 4} \\right)}{\\Lambda \\sqrt{\\ln 4}}
+
+        Args:
+            mu: The peak of the distribution.
+            sigma: The width of the distribution.
+            lambd: The tail of the distribution.
+            obs: |@doc:pdf.init.obs| Observables of the
+               model. This will be used as the default space of the PDF and,
+               if not given explicitly, as the normalization range.
+
+               The default space is used for example in the sample method: if no
+               sampling limits are given, the default space is used.
+
+               The observables are not equal to the domain as it does not restrict or
+               truncate the model outside this range. |@docend:pdf.init.obs|
+            extended: |@doc:pdf.init.extended| The overall yield of the PDF.
+               If this is parameter-like, it will be used as the yield,
+               the expected number of events, and the PDF will be extended.
+               An extended PDF has additional functionality, such as the
+               ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
+            norm: |@doc:pdf.init.norm| Normalization of the PDF.
+               By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
+            name: |@doc:pdf.init.name| Human-readable name
+               or label of
+               the PDF for better identification.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
+        """
+        params = {"mu": mu, "sigma": sigma, "lambd": lambd}
+        super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm)
+
+    def _unnormalized_pdf(self, x):
+        mu = self.params["mu"]
+        sigma = self.params["sigma"]
+        lambd = self.params["lambd"]
+        return novosibirsk_pdf(x, mu=mu, sigma=sigma, lambd=lambd)
+
+
+novosibirsk_integral_limits = Space(axes=0, limits=(ANY_LOWER, ANY_UPPER))
+Novosibirsk.register_analytic_integral(func=novosibirsk_integral, limits=novosibirsk_integral_limits)
diff --git a/zfit_physics/pdf.py b/zfit_physics/pdf.py
@@ -2,7 +2,8 @@
 from .models.pdf_cmsshape import CMSShape
 from .models.pdf_cruijff import Cruijff
 from .models.pdf_erfexp import ErfExp
+from .models.pdf_novosibirsk import Novosibirsk
 from .models.pdf_relbw import RelativisticBreitWigner
 from .models.pdf_tsallis import Tsallis
 
-__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape", "Cruijff", "ErfExp", "Tsallis"]
+__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape", "Cruijff", "ErfExp", "Novosibirsk", "Tsallis"]