Merge pull request #63 from ikrommyd/feat-cmsshape

feat: add CMSShape PDF
zfit · Mar 15, 2024 · 1b431ca · 1b431ca
2 parents 251f4e2 + 4779dca
commit 1b431ca
Show file tree

Hide file tree

Showing 5 changed files with 224 additions and 1 deletion.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -7,6 +7,7 @@ Develop
 
 Major Features and Improvements
 -------------------------------
+- added CMSShape PDF
 
 Breaking changes
 ------------------

diff --git a/requirements_dev.txt b/requirements_dev.txt
@@ -2,6 +2,7 @@ bumpversion>=0.5.3
 coverage>=4.5.1
 coverage>=4.5.1
 flake8>=3.5.0
+numba-stats @ git+https://github.com/HDembinski/numba-stats.git  # CMSShape not yet released (expected 1.8.0)
 pip>=9.0.1
 pre-commit
 pytest>=3.4.2

diff --git a/tests/test_pdf_cmsshape.py b/tests/test_pdf_cmsshape.py
@@ -0,0 +1,76 @@
+"""Tests for CMSShape PDF."""
+import numpy as np
+import pytest
+import tensorflow as tf
+import zfit
+from numba_stats import cmsshape as cmsshape_numba
+
+# Important, do the imports below
+from zfit.core.testing import tester
+
+import zfit_physics as zphys
+
+# specify globals here. Do NOT add any TensorFlow but just pure python
+m_true = 90.0
+beta_true = 0.2
+gamma_true = 0.3
+
+
+def create_cmsshape(m, beta, gamma, limits):
+    obs = zfit.Space("obs1", limits)
+    cmsshape = zphys.pdf.CMSShape(m=m, beta=beta, gamma=gamma, obs=obs)
+    return cmsshape, obs
+
+
+def test_cmsshape_pdf():
+    # Test PDF here
+    cmsshape, _ = create_cmsshape(m=m_true, beta=beta_true, gamma=gamma_true, limits=(50, 130))
+    assert zfit.run(cmsshape.pdf(90.0)) == pytest.approx(
+        cmsshape_numba.pdf(90.0, beta=beta_true, gamma=gamma_true, loc=m_true).item(), rel=1e-5
+    )
+    np.testing.assert_allclose(
+        cmsshape.pdf(tf.range(50.0, 130, 10_000)),
+        cmsshape_numba.pdf(tf.range(50.0, 130, 10_000).numpy(), beta=beta_true, gamma=gamma_true, loc=m_true),
+        rtol=1e-5,
+    )
+    assert cmsshape.pdf(tf.range(50.0, 130, 10_000)) <= cmsshape.pdf(90.0)
+
+    sample = cmsshape.sample(1000)
+    tf.debugging.assert_all_finite(sample.value(), "Some samples from the cmsshape PDF are NaN or infinite")
+    assert sample.n_events == 1000
+    assert all(tf.logical_and(50 <= sample.value(), sample.value() <= 130))
+
+
+def test_cmsshape_integral():
+    # Test CDF and integral here
+    cmsshape, obs = create_cmsshape(m=m_true, beta=beta_true, gamma=gamma_true, limits=(50, 130))
+    full_interval_analytic = zfit.run(cmsshape.analytic_integrate(obs, norm_range=False))
+    full_interval_numeric = zfit.run(cmsshape.numeric_integrate(obs, norm_range=False))
+    true_integral = 0.99999
+    numba_stats_full_integral = cmsshape_numba.cdf(
+        130, beta=beta_true, gamma=gamma_true, loc=m_true
+    ) - cmsshape_numba.cdf(50, beta=beta_true, gamma=gamma_true, loc=m_true)
+    assert full_interval_analytic == pytest.approx(true_integral, 1e-5)
+    assert full_interval_numeric == pytest.approx(true_integral, 1e-5)
+    assert full_interval_analytic == pytest.approx(numba_stats_full_integral, 1e-8)
+    assert full_interval_numeric == pytest.approx(numba_stats_full_integral, 1e-8)
+
+    analytic_integral = zfit.run(cmsshape.analytic_integrate(limits=(80, 100), norm_range=False))
+    numeric_integral = zfit.run(cmsshape.numeric_integrate(limits=(80, 100), norm_range=False))
+    numba_stats_integral = cmsshape_numba.cdf(100, beta=beta_true, gamma=gamma_true, loc=m_true) - cmsshape_numba.cdf(
+        80, beta=beta_true, gamma=gamma_true, loc=m_true
+    )
+    assert analytic_integral == pytest.approx(numeric_integral, 1e-8)
+    assert analytic_integral == pytest.approx(numba_stats_integral, 1e-8)
+
+
+# register the pdf here and provide sets of working parameter configurations
+def cmsshape_params_factory():
+    m = zfit.Parameter("m", m_true)
+    beta = zfit.Parameter("beta", beta_true)
+    gamma = zfit.Parameter("gamma", gamma_true)
+
+    return {"m": m, "beta": beta, "gamma": gamma}
+
+
+tester.register_pdf(pdf_class=zphys.pdf.CMSShape, params_factories=cmsshape_params_factory)
diff --git a/zfit_physics/models/pdf_cmsshape.py b/zfit_physics/models/pdf_cmsshape.py
@@ -0,0 +1,144 @@
+from typing import Optional
+
+import tensorflow as tf
+import zfit
+from zfit import z
+from zfit.core.space import ANY_LOWER, ANY_UPPER, Space
+from zfit.util import ztyping
+
+
+@z.function(wraps="tensor")
+def cmsshape_pdf_func(x, m, beta, gamma):
+    """Calculate the CMSShape PDF.
+
+    Args:
+        x: value(s) for which the PDF will be calculated.
+        m: approximate center of the disribution.
+        beta: steepness of the error function.
+        gamma: steepness of the exponential distribution.
+
+    Returns:
+        `tf.Tensor`: The calculated PDF values.
+
+    Notes:
+        Based on code from `spark_tnp <https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc>`_ and
+        `numba-stats <https://github.com/HDembinski/numba-stats/blob/main/src/numba_stats/cmsshape.py>`_.
+    """
+    x = z.unstack_x(x)
+    half = 0.5
+    two = 2.0
+    t1 = tf.math.exp(-gamma * (x - m))
+    t2 = tf.math.erfc(-beta * (x - m))
+    t3 = half * gamma * tf.math.exp(-((half * gamma / beta) ** two))
+    return t1 * t2 * t3
+
+
+@z.function(wraps="tensor")
+def cmsshape_cdf_func(x, m, beta, gamma):
+    """Analtical function for the CDF of the CMSShape distribution.
+
+    Args:
+        x: value(s) for which the CDF will be calculated.
+        m: approximate center of the distribution.
+        beta: steepness of the error function.
+        gamma: steepness of the exponential distribution.
+
+    Returns:
+        `tf.Tensor`: The calculated CDF values.
+
+    Notes:
+        Based on code from `spark_tnp <https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc>`_ and
+        `numba-stats <https://github.com/HDembinski/numba-stats/blob/main/src/numba_stats/cmsshape.py>`_
+    """
+    half = 0.5
+    two = 2.0
+    y = x - m
+    t1 = tf.math.erf(gamma / (two * beta) + beta * y)
+    t2 = tf.math.exp(-((gamma / (two * beta)) ** two) - gamma * y)
+    t3 = tf.math.erfc(-beta * y)
+    return half * (t1 - t2 * t3) + half
+
+
+def cmsshape_integral(limits: ztyping.SpaceType, params: dict, model) -> tf.Tensor:
+    """Calculates the analytic integral of the CMSShape PDF.
+
+    Args:
+        limits: An object with attribute limit1d.
+        params: A hashmap from which the parameters that defines the PDF will be extracted.
+        model: Will be ignored.
+
+    Returns:
+        The calculated integral.
+    """
+    lower, upper = limits.limit1d
+    m = params["m"]
+    beta = params["beta"]
+    gamma = params["gamma"]
+    lower_cdf = cmsshape_cdf_func(x=lower, m=m, beta=beta, gamma=gamma)
+    upper_cdf = cmsshape_cdf_func(x=upper, m=m, beta=beta, gamma=gamma)
+    return upper_cdf - lower_cdf
+
+
+class CMSShape(zfit.pdf.BasePDF):
+    _N_OBS = 1
+
+    def __init__(
+        self,
+        m: ztyping.ParamTypeInput,
+        beta: ztyping.ParamTypeInput,
+        gamma: ztyping.ParamTypeInput,
+        obs: ztyping.ObsTypeInput,
+        *,
+        extended: Optional[ztyping.ExtendedInputType] = None,
+        norm: Optional[ztyping.NormInputType] = None,
+        name: str = "CMSShape",
+    ):
+        """CMSShape PDF.
+
+        The distribution consists of an exponential decay suppressed at small values by the
+        complementary error function. The product is an asymmetric peak with a bell shape on the
+        left-hand side at low mass due to threshold effect and an exponential tail on the right-hand side.
+        This shape is used by the CMS experiment to model the background in the invariant mass distribution
+        of Z to ll decay candidates.
+
+        Formula for the PDF and CDF are based on code from
+        `spark_tnp <https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc>`_ and
+        `numba-stats <https://github.com/HDembinski/numba-stats/blob/main/src/numba_stats/cmsshape.py>`_
+
+        Args:
+            m: Approximate center of the distribution.
+            beta: Steepness of the error function.
+            gamma: Steepness of the exponential distribution.
+            obs: |@doc:pdf.init.obs| Observables of the
+               model. This will be used as the default space of the PDF and,
+               if not given explicitly, as the normalization range.
+
+               The default space is used for example in the sample method: if no
+               sampling limits are given, the default space is used.
+
+               The observables are not equal to the domain as it does not restrict or
+               truncate the model outside this range. |@docend:pdf.init.obs|
+            extended: |@doc:pdf.init.extended| The overall yield of the PDF.
+               If this is parameter-like, it will be used as the yield,
+               the expected number of events, and the PDF will be extended.
+               An extended PDF has additional functionality, such as the
+               ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
+            norm: |@doc:pdf.init.norm| Normalization of the PDF.
+               By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
+            name: |@doc:pdf.init.name| Human-readable name
+               or label of
+               the PDF for better identification.
+               Has no programmatical functional purpose as identification. |@docend:pdf.init.name|
+        """
+        params = {"m": m, "beta": beta, "gamma": gamma}
+        super().__init__(obs=obs, params=params, name=name, extended=extended, norm=norm)
+
+    def _unnormalized_pdf(self, x: tf.Tensor) -> tf.Tensor:
+        m = self.params["m"]
+        beta = self.params["beta"]
+        gamma = self.params["gamma"]
+        return cmsshape_pdf_func(x=x, m=m, beta=beta, gamma=gamma)
+
+
+cmsshape_integral_limits = Space(axes=(0,), limits=(((ANY_LOWER,),), ((ANY_UPPER,),)))
+CMSShape.register_analytic_integral(func=cmsshape_integral, limits=cmsshape_integral_limits)
diff --git a/zfit_physics/pdf.py b/zfit_physics/pdf.py
@@ -1,4 +1,5 @@
 from .models.pdf_argus import Argus
+from .models.pdf_cmsshape import CMSShape
 from .models.pdf_relbw import RelativisticBreitWigner
 
-__all__ = ["Argus", "RelativisticBreitWigner"]
+__all__ = ["Argus", "RelativisticBreitWigner", "CMSShape"]
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,6 +7,7 @@ Develop @@
     Major Features and Improvements
     -------------------------------
+    - added CMSShape PDF
     Breaking changes
     ------------------
@@ Expand Down @@