Skip to content

Commit

Permalink
Merge pull request #63 from ikrommyd/feat-cmsshape
Browse files Browse the repository at this point in the history
feat: add CMSShape PDF
  • Loading branch information
jonas-eschle authored Mar 15, 2024
2 parents 251f4e2 + 4779dca commit 1b431ca
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Develop

Major Features and Improvements
-------------------------------
- added CMSShape PDF

Breaking changes
------------------
Expand Down
1 change: 1 addition & 0 deletions requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ bumpversion>=0.5.3
coverage>=4.5.1
coverage>=4.5.1
flake8>=3.5.0
numba-stats @ git+https://github.com/HDembinski/numba-stats.git # CMSShape not yet released (expected 1.8.0)
pip>=9.0.1
pre-commit
pytest>=3.4.2
Expand Down
76 changes: 76 additions & 0 deletions tests/test_pdf_cmsshape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Tests for CMSShape PDF."""
import numpy as np
import pytest
import tensorflow as tf
import zfit
from numba_stats import cmsshape as cmsshape_numba

# Important, do the imports below
from zfit.core.testing import tester

import zfit_physics as zphys

# specify globals here. Do NOT add any TensorFlow but just pure python
m_true = 90.0
beta_true = 0.2
gamma_true = 0.3


def create_cmsshape(m, beta, gamma, limits):
obs = zfit.Space("obs1", limits)
cmsshape = zphys.pdf.CMSShape(m=m, beta=beta, gamma=gamma, obs=obs)
return cmsshape, obs


def test_cmsshape_pdf():
# Test PDF here
cmsshape, _ = create_cmsshape(m=m_true, beta=beta_true, gamma=gamma_true, limits=(50, 130))
assert zfit.run(cmsshape.pdf(90.0)) == pytest.approx(
cmsshape_numba.pdf(90.0, beta=beta_true, gamma=gamma_true, loc=m_true).item(), rel=1e-5
)
np.testing.assert_allclose(
cmsshape.pdf(tf.range(50.0, 130, 10_000)),
cmsshape_numba.pdf(tf.range(50.0, 130, 10_000).numpy(), beta=beta_true, gamma=gamma_true, loc=m_true),
rtol=1e-5,
)
assert cmsshape.pdf(tf.range(50.0, 130, 10_000)) <= cmsshape.pdf(90.0)

sample = cmsshape.sample(1000)
tf.debugging.assert_all_finite(sample.value(), "Some samples from the cmsshape PDF are NaN or infinite")
assert sample.n_events == 1000
assert all(tf.logical_and(50 <= sample.value(), sample.value() <= 130))


def test_cmsshape_integral():
# Test CDF and integral here
cmsshape, obs = create_cmsshape(m=m_true, beta=beta_true, gamma=gamma_true, limits=(50, 130))
full_interval_analytic = zfit.run(cmsshape.analytic_integrate(obs, norm_range=False))
full_interval_numeric = zfit.run(cmsshape.numeric_integrate(obs, norm_range=False))
true_integral = 0.99999
numba_stats_full_integral = cmsshape_numba.cdf(
130, beta=beta_true, gamma=gamma_true, loc=m_true
) - cmsshape_numba.cdf(50, beta=beta_true, gamma=gamma_true, loc=m_true)
assert full_interval_analytic == pytest.approx(true_integral, 1e-5)
assert full_interval_numeric == pytest.approx(true_integral, 1e-5)
assert full_interval_analytic == pytest.approx(numba_stats_full_integral, 1e-8)
assert full_interval_numeric == pytest.approx(numba_stats_full_integral, 1e-8)

analytic_integral = zfit.run(cmsshape.analytic_integrate(limits=(80, 100), norm_range=False))
numeric_integral = zfit.run(cmsshape.numeric_integrate(limits=(80, 100), norm_range=False))
numba_stats_integral = cmsshape_numba.cdf(100, beta=beta_true, gamma=gamma_true, loc=m_true) - cmsshape_numba.cdf(
80, beta=beta_true, gamma=gamma_true, loc=m_true
)
assert analytic_integral == pytest.approx(numeric_integral, 1e-8)
assert analytic_integral == pytest.approx(numba_stats_integral, 1e-8)


# register the pdf here and provide sets of working parameter configurations
def cmsshape_params_factory():
m = zfit.Parameter("m", m_true)
beta = zfit.Parameter("beta", beta_true)
gamma = zfit.Parameter("gamma", gamma_true)

return {"m": m, "beta": beta, "gamma": gamma}


tester.register_pdf(pdf_class=zphys.pdf.CMSShape, params_factories=cmsshape_params_factory)
144 changes: 144 additions & 0 deletions zfit_physics/models/pdf_cmsshape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from typing import Optional

import tensorflow as tf
import zfit
from zfit import z
from zfit.core.space import ANY_LOWER, ANY_UPPER, Space
from zfit.util import ztyping


@z.function(wraps="tensor")
def cmsshape_pdf_func(x, m, beta, gamma):
"""Calculate the CMSShape PDF.
Args:
x: value(s) for which the PDF will be calculated.
m: approximate center of the disribution.
beta: steepness of the error function.
gamma: steepness of the exponential distribution.
Returns:
`tf.Tensor`: The calculated PDF values.
Notes:
Based on code from `spark_tnp <https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc>`_ and
`numba-stats <https://github.com/HDembinski/numba-stats/blob/main/src/numba_stats/cmsshape.py>`_.
"""
x = z.unstack_x(x)
half = 0.5
two = 2.0
t1 = tf.math.exp(-gamma * (x - m))
t2 = tf.math.erfc(-beta * (x - m))
t3 = half * gamma * tf.math.exp(-((half * gamma / beta) ** two))
return t1 * t2 * t3


@z.function(wraps="tensor")
def cmsshape_cdf_func(x, m, beta, gamma):
"""Analtical function for the CDF of the CMSShape distribution.
Args:
x: value(s) for which the CDF will be calculated.
m: approximate center of the distribution.
beta: steepness of the error function.
gamma: steepness of the exponential distribution.
Returns:
`tf.Tensor`: The calculated CDF values.
Notes:
Based on code from `spark_tnp <https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc>`_ and
`numba-stats <https://github.com/HDembinski/numba-stats/blob/main/src/numba_stats/cmsshape.py>`_
"""
half = 0.5
two = 2.0
y = x - m
t1 = tf.math.erf(gamma / (two * beta) + beta * y)
t2 = tf.math.exp(-((gamma / (two * beta)) ** two) - gamma * y)
t3 = tf.math.erfc(-beta * y)
return half * (t1 - t2 * t3) + half


def cmsshape_integral(limits: ztyping.SpaceType, params: dict, model) -> tf.Tensor:
"""Calculates the analytic integral of the CMSShape PDF.
Args:
limits: An object with attribute limit1d.
params: A hashmap from which the parameters that defines the PDF will be extracted.
model: Will be ignored.
Returns:
The calculated integral.
"""
lower, upper = limits.limit1d
m = params["m"]
beta = params["beta"]
gamma = params["gamma"]
lower_cdf = cmsshape_cdf_func(x=lower, m=m, beta=beta, gamma=gamma)
upper_cdf = cmsshape_cdf_func(x=upper, m=m, beta=beta, gamma=gamma)
return upper_cdf - lower_cdf


class CMSShape(zfit.pdf.BasePDF):
_N_OBS = 1

def __init__(
self,
m: ztyping.ParamTypeInput,
beta: ztyping.ParamTypeInput,
gamma: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: Optional[ztyping.ExtendedInputType] = None,
norm: Optional[ztyping.NormInputType] = None,
name: str = "CMSShape",
):
"""CMSShape PDF.
The distribution consists of an exponential decay suppressed at small values by the
complementary error function. The product is an asymmetric peak with a bell shape on the
left-hand side at low mass due to threshold effect and an exponential tail on the right-hand side.
This shape is used by the CMS experiment to model the background in the invariant mass distribution
of Z to ll decay candidates.
Formula for the PDF and CDF are based on code from
`spark_tnp <https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc>`_ and
`numba-stats <https://github.com/HDembinski/numba-stats/blob/main/src/numba_stats/cmsshape.py>`_
Args:
m: Approximate center of the distribution.
beta: Steepness of the error function.
gamma: Steepness of the exponential distribution.
obs: |@doc:pdf.init.obs| Observables of the
model. This will be used as the default space of the PDF and,
if not given explicitly, as the normalization range.
The default space is used for example in the sample method: if no
sampling limits are given, the default space is used.
The observables are not equal to the domain as it does not restrict or
truncate the model outside this range. |@docend:pdf.init.obs|
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Human-readable name
or label of
the PDF for better identification.
Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
"""
params = {"m": m, "beta": beta, "gamma": gamma}
super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm)

def _unnormalized_pdf(self, x: tf.Tensor) -> tf.Tensor:
m = self.params["m"]
beta = self.params["beta"]
gamma = self.params["gamma"]
return cmsshape_pdf_func(x=x, m=m, beta=beta, gamma=gamma)


cmsshape_integral_limits = Space(axes=(0,), limits=(((ANY_LOWER,),), ((ANY_UPPER,),)))
CMSShape.register_analytic_integral(func=cmsshape_integral, limits=cmsshape_integral_limits)
3 changes: 2 additions & 1 deletion zfit_physics/pdf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .models.pdf_argus import Argus
from .models.pdf_cmsshape import CMSShape
from .models.pdf_relbw import RelativisticBreitWigner

__all__ = ["Argus", "RelativisticBreitWigner"]
__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape"]

0 comments on commit 1b431ca

Please sign in to comment.