Skip to content

Commit

Permalink
feat: add Novosibirsk PDF (#68)
Browse files Browse the repository at this point in the history
* add novosibirsk, no tests yet

* fix default parameter value

* rename Lambda to lambd

* add tests, and black formatting

* black formatted way too much

* undo more incorrect formatting

* final formattng undoing

* ok I lied, this is final
:

* Update zfit_physics/models/pdf_novosibirsk.py

Co-authored-by: Jonas Eschle <[email protected]>

* Update zfit_physics/models/pdf_novosibirsk.py

Co-authored-by: Jonas Eschle <[email protected]>

* Update zfit_physics/models/pdf_novosibirsk.py

Co-authored-by: Jonas Eschle <[email protected]>

* typo

* revert last two commits

* Update zfit_physics/models/pdf_novosibirsk.py

* pre-commit run -a fixes

* rename parameters in function defs

* test tail edge cases

---------

Co-authored-by: Jonas Eschle <[email protected]>
  • Loading branch information
ikrommyd and jonas-eschle authored Apr 1, 2024
1 parent b260af3 commit 2a8f217
Show file tree
Hide file tree
Showing 6 changed files with 283 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Major Features and Improvements
- added CMSShape PDF
- added Cruijff PDF
- added ErfExp PDF
- added Novosibirsk PDF
- added Tsallis PDF

Breaking changes
Expand Down
6 changes: 4 additions & 2 deletions tests/test_pdf_erfexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ def test_erfexp_pdf():
# Test PDF here
erfexp, _ = create_erfexp(mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true, limits=(50, 130))
assert erfexp.pdf(90.0, norm=False).numpy().item() == pytest.approx(
erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true), rel=1e-8
erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true),
rel=1e-8,
)
assert erfexp.pdf(90.0).numpy().item() == pytest.approx(
erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true) / 71.18838, rel=1e-8
erfexp_numpy(90.0, mu=mu_true, beta=beta_true, gamma=gamma_true, n=n_true) / 71.18838,
rel=1e-8,
)
np.testing.assert_allclose(
erfexp.pdf(tf.range(50.0, 130, 10_000), norm=False),
Expand Down
99 changes: 99 additions & 0 deletions tests/test_pdf_novosibirsk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
"""Tests for Novosibirsk PDF."""

import numpy as np
import pytest
import ROOT
import tensorflow as tf
import zfit
from zfit.core.testing import tester

import zfit_physics as zphys

mu_true = 90.0
sigma_true = 10.0
lambd_true = 0.5

true_integral_dict = {
1e-10: 25.06469498570457,
0.5: 23.021160811717586,
1.0: 18.148056725398746,
10.0: 0.6499205017648246,
}


def create_novosibirsk(mu, sigma, lambd, limits):
obs = zfit.Space("obs1", limits)
novosibirsk = zphys.pdf.Novosibirsk(mu=mu, sigma=sigma, lambd=lambd, obs=obs)
return novosibirsk, obs


def create_and_eval_root_novosibirsk_and_integral(mu, sigma, lambd, limits, x, lower, upper):
obs = ROOT.RooRealVar("obs", "obs", *limits)
peak = ROOT.RooRealVar("peak", "peak", mu)
width = ROOT.RooRealVar("width", "width", sigma)
tail = ROOT.RooRealVar("tail", "tail", lambd)
novosibirsk = ROOT.RooNovosibirsk("novosibirsk", "novosibirsk", obs, peak, width, tail)

out = np.zeros_like(x)
for i, xi in enumerate(x):
obs.setVal(xi)
out[i] = novosibirsk.getVal(ROOT.RooArgSet(obs))

obs.setRange("integrationRange", lower, upper)
integral = novosibirsk.createIntegral(ROOT.RooArgSet(obs), ROOT.RooFit.Range("integrationRange")).getVal()
return out, integral


@pytest.mark.parametrize("lambd_true", [1e-10, 0.5, 1.0, 10.0])
def test_novosibirsk_pdf(lambd_true):
# Teat PDF here
novosibirsk, _ = create_novosibirsk(mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130))
novosibirsk_root_90 = create_and_eval_root_novosibirsk_and_integral(
mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([90.0]), lower=50, upper=130
)[0]
assert novosibirsk.pdf(90.0).numpy() == pytest.approx(novosibirsk_root_90, rel=1e-5)
test_values = tf.range(50.0, 130, 10_000)
novosibirsk_root_test_values = create_and_eval_root_novosibirsk_and_integral(
mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=test_values, lower=50, upper=130
)[0]
np.testing.assert_allclose(novosibirsk.pdf(test_values).numpy(), novosibirsk_root_test_values, rtol=1e-5)
assert novosibirsk.pdf(test_values) <= novosibirsk.pdf(90.0)
sample = novosibirsk.sample(1000)
assert all(np.isfinite(sample.value())), "Some samples from the Novosibirsk PDF are NaN or infinite"
assert sample.n_events == 1000
assert all(tf.logical_and(50 <= sample.value(), sample.value() <= 130))


@pytest.mark.parametrize("lambd_true", [1e-10, 0.5, 1.0, 10.0])
def test_novosibirsk_integral(lambd_true):
# Test CDF and integral here
novosibirsk, obs = create_novosibirsk(mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130))
full_interval_analytic = novosibirsk.analytic_integrate(obs, norm=False).numpy()
full_interval_numeric = novosibirsk.numeric_integrate(obs, norm=False).numpy()
true_integral = true_integral_dict[lambd_true]
root_integral = create_and_eval_root_novosibirsk_and_integral(
mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([50, 130]), lower=50, upper=130
)[1]
assert full_interval_analytic == pytest.approx(true_integral, 1e-5)
assert full_interval_numeric == pytest.approx(true_integral, 1e-3)
assert full_interval_analytic == pytest.approx(root_integral, 1e-5)
assert full_interval_numeric == pytest.approx(root_integral, 1e-3)

analytic_integral = novosibirsk.analytic_integrate(limits=(80, 100), norm=False).numpy()
numeric_integral = novosibirsk.numeric_integrate(limits=(80, 100), norm=False).numpy()
root_integral = create_and_eval_root_novosibirsk_and_integral(
mu=mu_true, sigma=sigma_true, lambd=lambd_true, limits=(50, 130), x=np.array([80, 100]), lower=80, upper=100
)[1]
assert analytic_integral == pytest.approx(numeric_integral, 1e-3)
assert analytic_integral == pytest.approx(root_integral, 1e-3)


# register the pdf here and provide sets of working parameter configurations
def novosibirsk_params_factory():
mu = zfit.Parameter("mu", mu_true)
sigma = zfit.Parameter("sigma", sigma_true)
lambd = zfit.Parameter("lambd", lambd_true)
return {"mu": mu, "sigma": sigma, "lambd": lambd}


tester.register_pdf(pdf_class=zphys.pdf.Novosibirsk, params_factories=novosibirsk_params_factory)
1 change: 1 addition & 0 deletions tests/test_pdf_tsallis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Tests for CMSShape PDF."""

import numpy as np
import pytest
import tensorflow as tf
Expand Down
176 changes: 176 additions & 0 deletions zfit_physics/models/pdf_novosibirsk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
from typing import Optional

import numpy as np
import tensorflow as tf
import zfit
import zfit.z.numpy as znp
from zfit import z
from zfit.core.space import ANY_LOWER, ANY_UPPER, Space
from zfit.util import ztyping


@z.function(wraps="tensor")
def novosibirsk_pdf(x, mu, sigma, lambd):
"""Calculate the Novosibirsk PDF.
Args:
x: value(s) for which the PDF will be calculated.
peak: peak of the distribution.
width: width of the distribution.
tail: tail of the distribution.
Returns:
`tf.Tensor`: The calculated PDF values.
Notes:
Function taken from H. Ikeda et al. NIM A441 (2000), p. 401 (Belle Collaboration)
Based on code from `ROOT <https://root.cern.ch/doc/master/RooNovosibirsk_8cxx_source.html>`_
"""
# rename parameters to follow roofit implementation
peak = mu
width = sigma
tail = lambd
x = z.unstack_x(x)

cond1 = znp.less(znp.abs(tail), 1e-7)
arg = 1.0 - (x - peak) * tail / width

cond2 = znp.less(arg, 1e-7)
log_arg = znp.log(arg)
xi = 2 * np.sqrt(np.log(4.0))

width_zero = (2.0 / xi) * znp.arcsinh(tail * xi * 0.5)
width_zero2 = width_zero**2
exponent = (-0.5 / width_zero2 * log_arg**2) - (width_zero2 * 0.5)
gauss = znp.exp(-0.5 * ((x - peak) / width) ** 2)

return znp.where(cond1, gauss, znp.where(cond2, 0.0, znp.exp(exponent)))


def novosibirsk_integral(limits: ztyping.SpaceType, params: dict, model) -> tf.Tensor:
"""Calculates the analytic integral of the Novosibirsk PDF.
Args:
limits: An object with attribute limit1d.
params: A hashmap from which the parameters that defines the PDF will be extracted.
model: Will be ignored.
Returns:
The calculated integral.
"""
lower, upper = limits.limit1d
mu = params["mu"]
sigma = params["sigma"]
lambd = params["lambd"]

return novosibirsk_integral_func(mu=mu, sigma=sigma, lambd=lambd, lower=lower, upper=upper)


@z.function(wraps="tensor")
def novosibirsk_integral_func(mu, sigma, lambd, lower, upper):
"""Calculate the integral of the Novosibirsk PDF.
Args:
peak: peak of the distribution.
width: width of the distribution.
tail: tail of the distribution.
lower: lower limit of the integral.
upper: upper limit of the integral.
Returns:
`tf.Tensor`: The calculated integral.
Notes:
Based on code from `ROOT <https://root.cern.ch/doc/master/RooNovosibirsk_8cxx_source.html>`_
"""
# rename parameters to follow roofit implementation
peak = mu
width = sigma
tail = lambd
sqrt2 = np.sqrt(2)
sqlog2 = np.sqrt(np.log(2))
sqlog4 = np.sqrt(np.log(4))
log4 = np.log(4)
rootpiby2 = np.sqrt(np.pi / 2)
sqpibylog2 = np.sqrt(np.pi / np.log(2))

cond = znp.less(znp.abs(tail), 1e-7)

xscale = sqrt2 * width
result_gauss = rootpiby2 * width * (tf.math.erf((upper - peak) / xscale) - tf.math.erf((lower - peak) / xscale))

log_argument_A = znp.maximum(((peak - lower) * tail + width) / width, 1e-7)
log_argument_B = znp.maximum(((peak - upper) * tail + width) / width, 1e-7)

term1 = znp.arcsinh(tail * sqlog4)
term1_2 = term1**2
erf_termA = (term1_2 - log4 * znp.log(log_argument_A)) / (2 * term1 * sqlog2)
erf_termB = (term1_2 - log4 * znp.log(log_argument_B)) / (2 * term1 * sqlog2)

result_novosibirsk = 0.5 / tail * width * term1 * (tf.math.erf(erf_termB) - tf.math.erf(erf_termA)) * sqpibylog2

return znp.where(cond, result_gauss, result_novosibirsk)


class Novosibirsk(zfit.pdf.BasePDF):
_N_OBS = 1

def __init__(
self,
mu,
sigma,
lambd,
obs,
*,
extended: Optional[ztyping.ExtendedInputType] = False,
norm: Optional[ztyping.NormInputType] = None,
name: str = "Novosibirsk",
):
"""Novosibirsk PDF.
The Novosibirsk function is a continuous probability density function (PDF) that is used to model
asymmetric peaks in high-energy physics. It is a theoretical Compton spectrum with a logarithmic Gaussian function.
.. math::
f(x;\\sigma, x_0, \\Lambda) = \\exp\\left[
-\\frac{1}{2} \\frac{\\left( \\ln q_y \\right)^2 }{\\Lambda^2} + \\Lambda^2 \\right] \\\\
q_y(x;\\sigma,x_0,\\Lambda) = 1 + \\frac{\\Lambda(x-x_0)}{\\sigma} \\times
\\frac{\\sinh \\left( \\Lambda \\sqrt{\\ln 4} \\right)}{\\Lambda \\sqrt{\\ln 4}}
Args:
mu: The peak of the distribution.
sigma: The width of the distribution.
lambd: The tail of the distribution.
obs: |@doc:pdf.init.obs| Observables of the
model. This will be used as the default space of the PDF and,
if not given explicitly, as the normalization range.
The default space is used for example in the sample method: if no
sampling limits are given, the default space is used.
The observables are not equal to the domain as it does not restrict or
truncate the model outside this range. |@docend:pdf.init.obs|
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Human-readable name
or label of
the PDF for better identification.
Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
"""
params = {"mu": mu, "sigma": sigma, "lambd": lambd}
super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm)

def _unnormalized_pdf(self, x):
mu = self.params["mu"]
sigma = self.params["sigma"]
lambd = self.params["lambd"]
return novosibirsk_pdf(x, mu=mu, sigma=sigma, lambd=lambd)


novosibirsk_integral_limits = Space(axes=0, limits=(ANY_LOWER, ANY_UPPER))
Novosibirsk.register_analytic_integral(func=novosibirsk_integral, limits=novosibirsk_integral_limits)
3 changes: 2 additions & 1 deletion zfit_physics/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from .models.pdf_cmsshape import CMSShape
from .models.pdf_cruijff import Cruijff
from .models.pdf_erfexp import ErfExp
from .models.pdf_novosibirsk import Novosibirsk
from .models.pdf_relbw import RelativisticBreitWigner
from .models.pdf_tsallis import Tsallis

__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape", "Cruijff", "ErfExp", "Tsallis"]
__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape", "Cruijff", "ErfExp", "Novosibirsk", "Tsallis"]

0 comments on commit 2a8f217

Please sign in to comment.