CMS-Shape distribution (#99)

Closes #97
HDembinski · Feb 21, 2024 · 33dea2b · 33dea2b
1 parent b6b44be
commit 33dea2b
Show file tree

Hide file tree

Showing 4 changed files with 110 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ We provide `numba`-accelerated implementations of statistical distributions for
 * Q-Gaussian
 * Bernstein density (not normalized to unity, use this in extended likelihood fits)
 * Cruijff density (not normalized to unity, use this in extended likelihood fits)
+* CMS-Shape
 
 with more to come. The speed gains are huge, up to a factor of 100 compared to `scipy`. Benchmarks are included in the repository and are run by `pytest`.
 

diff --git a/src/numba_stats/_util.py b/src/numba_stats/_util.py
@@ -1,5 +1,6 @@
 """Utilities for code and docs generation to reduce boilerplate code."""
 
+import math
 import numba as nb
 import numpy as np
 from numba.types import Array
@@ -78,6 +79,18 @@ def _trans(x, loc, scale):
     return (x - loc) * inv_scale
 
 
+@nb.njit(cache=True, inline="always", error_model="numpy")
+def _erf_inplace(x):
+    for i in _prange(len(x)):
+        x[i] = math.erf(x[i])
+
+
+@nb.njit(cache=True, inline="always", error_model="numpy")
+def _erfc_inplace(x):
+    for i in _prange(len(x)):
+        x[i] = math.erfc(x[i])
+
+
 def _type_check(first, *rest):
     if not (isinstance(first, Array) and first.dtype in _Floats):
         raise TypingError("first argument must be an array of floating point type")

diff --git a/src/numba_stats/cmsshape.py b/src/numba_stats/cmsshape.py
@@ -0,0 +1,65 @@
+"""
+CMS-Shape distribution (for lack of a better name).
+
+The distribution consists of an exponential decay suppressed at small values by the
+complementary error function. The product is an asymmetric peak with a bell shape on the
+left-hand side and an exponential tail on the right-hand side. This shape is used by the
+CMS experiment to model the background in the invariant mass distribution of Z to ll
+decay candidates.
+
+Notes
+-----
+This implementation was modeled after
+https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc, but heavily
+modified. An analytical normalization and an analytical cdf were added. The parameters
+"alpha" and "peak" in the original implementation turned out to be redundant and have
+been replaced with a single parameter "loc", which is the approximate center of the
+distribution.
+"""
+
+import numpy as np
+from ._util import _jit, _generate_wrappers, _erf_inplace, _erfc_inplace
+
+_doc_par = """
+beta : float
+    Steepness of the error function. Must be positive.
+gamma : float
+    Steepness of the exponential distribution. Must be positive.
+loc: float
+    Approximate center of the distribution.
+"""
+
+
+@_jit(3)
+def _logpdf(x, beta, gamma, loc):
+    T = type(beta)
+    two = T(2)
+    half = T(0.5)
+    v = -(x - loc) * beta
+    _erfc_inplace(v)
+    u = (x - loc) * gamma
+    T = type(beta)
+    log_t = (half * gamma / beta) ** two
+    return np.log(v) - u + np.log(half * gamma) - log_t
+
+
+@_jit(3)
+def _pdf(x, beta, gamma, loc):
+    return np.exp(_logpdf(x, beta, gamma, loc))
+
+
+@_jit(3)
+def _cdf(x, beta, gamma, loc):
+    T = type(beta)
+    y = x - loc
+    two = T(2)
+    half = T(0.5)
+    t1 = gamma / (two * beta) + beta * y
+    _erf_inplace(t1)
+    t2 = np.exp(-((gamma / (two * beta)) ** two) - gamma * y)
+    t3 = -beta * y
+    _erfc_inplace(t3)
+    return half * (t1 - t2 * t3) + half
+
+
+_generate_wrappers(globals())
diff --git a/tests/test_cmsshape.py b/tests/test_cmsshape.py
@@ -0,0 +1,31 @@
+from numba_stats import cmsshape, expon
+from scipy.integrate import quad
+import pytest
+from numpy.testing import assert_allclose
+import numpy as np
+
+
+def test_pdf_1():
+    par = 1.1, 2.2, 3.3
+    assert quad(lambda x: cmsshape.pdf(x, *par), -10, 10)[0] == pytest.approx(1)
+
+
+def test_pdf_2():
+    par = 1e3, 1.5, 0
+    x = np.linspace(-3, 3, 1000)
+    got = cmsshape.pdf(x, *par)
+    expected = expon.pdf(x, 0, 2 / 3)
+    assert_allclose(got, expected, atol=1e-3)
+
+
+def test_cdf():
+    par = 1.1, 2.2, 3.3
+    x = np.linspace(-3, 3, 20)
+
+    @np.vectorize
+    def num_cdf(x):
+        return quad(lambda x: cmsshape.pdf(x, *par), -10, x)[0]
+
+    expected = num_cdf(x)
+    got = cmsshape.cdf(x, *par)
+    assert_allclose(got, expected, atol=1e-10)