Skip to content

Commit

Permalink
CMS-Shape distribution (#99)
Browse files Browse the repository at this point in the history
Closes #97
  • Loading branch information
HDembinski authored Feb 21, 2024
1 parent b6b44be commit 33dea2b
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ We provide `numba`-accelerated implementations of statistical distributions for
* Q-Gaussian
* Bernstein density (not normalized to unity, use this in extended likelihood fits)
* Cruijff density (not normalized to unity, use this in extended likelihood fits)
* CMS-Shape

with more to come. The speed gains are huge, up to a factor of 100 compared to `scipy`. Benchmarks are included in the repository and are run by `pytest`.

Expand Down
13 changes: 13 additions & 0 deletions src/numba_stats/_util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utilities for code and docs generation to reduce boilerplate code."""

import math
import numba as nb
import numpy as np
from numba.types import Array
Expand Down Expand Up @@ -78,6 +79,18 @@ def _trans(x, loc, scale):
return (x - loc) * inv_scale


@nb.njit(cache=True, inline="always", error_model="numpy")
def _erf_inplace(x):
for i in _prange(len(x)):
x[i] = math.erf(x[i])


@nb.njit(cache=True, inline="always", error_model="numpy")
def _erfc_inplace(x):
for i in _prange(len(x)):
x[i] = math.erfc(x[i])


def _type_check(first, *rest):
if not (isinstance(first, Array) and first.dtype in _Floats):
raise TypingError("first argument must be an array of floating point type")
Expand Down
65 changes: 65 additions & 0 deletions src/numba_stats/cmsshape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""
CMS-Shape distribution (for lack of a better name).
The distribution consists of an exponential decay suppressed at small values by the
complementary error function. The product is an asymmetric peak with a bell shape on the
left-hand side and an exponential tail on the right-hand side. This shape is used by the
CMS experiment to model the background in the invariant mass distribution of Z to ll
decay candidates.
Notes
-----
This implementation was modeled after
https://gitlab.cern.ch/cms-muonPOG/spark_tnp/-/blob/Spark3/RooCMSShape.cc, but heavily
modified. An analytical normalization and an analytical cdf were added. The parameters
"alpha" and "peak" in the original implementation turned out to be redundant and have
been replaced with a single parameter "loc", which is the approximate center of the
distribution.
"""

import numpy as np
from ._util import _jit, _generate_wrappers, _erf_inplace, _erfc_inplace

_doc_par = """
beta : float
Steepness of the error function. Must be positive.
gamma : float
Steepness of the exponential distribution. Must be positive.
loc: float
Approximate center of the distribution.
"""


@_jit(3)
def _logpdf(x, beta, gamma, loc):
T = type(beta)
two = T(2)
half = T(0.5)
v = -(x - loc) * beta
_erfc_inplace(v)
u = (x - loc) * gamma
T = type(beta)
log_t = (half * gamma / beta) ** two
return np.log(v) - u + np.log(half * gamma) - log_t


@_jit(3)
def _pdf(x, beta, gamma, loc):
return np.exp(_logpdf(x, beta, gamma, loc))


@_jit(3)
def _cdf(x, beta, gamma, loc):
T = type(beta)
y = x - loc
two = T(2)
half = T(0.5)
t1 = gamma / (two * beta) + beta * y
_erf_inplace(t1)
t2 = np.exp(-((gamma / (two * beta)) ** two) - gamma * y)
t3 = -beta * y
_erfc_inplace(t3)
return half * (t1 - t2 * t3) + half


_generate_wrappers(globals())
31 changes: 31 additions & 0 deletions tests/test_cmsshape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from numba_stats import cmsshape, expon
from scipy.integrate import quad
import pytest
from numpy.testing import assert_allclose
import numpy as np


def test_pdf_1():
par = 1.1, 2.2, 3.3
assert quad(lambda x: cmsshape.pdf(x, *par), -10, 10)[0] == pytest.approx(1)


def test_pdf_2():
par = 1e3, 1.5, 0
x = np.linspace(-3, 3, 1000)
got = cmsshape.pdf(x, *par)
expected = expon.pdf(x, 0, 2 / 3)
assert_allclose(got, expected, atol=1e-3)


def test_cdf():
par = 1.1, 2.2, 3.3
x = np.linspace(-3, 3, 20)

@np.vectorize
def num_cdf(x):
return quad(lambda x: cmsshape.pdf(x, *par), -10, x)[0]

expected = num_cdf(x)
got = cmsshape.cdf(x, *par)
assert_allclose(got, expected, atol=1e-10)

0 comments on commit 33dea2b

Please sign in to comment.