Source code for emgfit.emg_funcs

################################################################################
##### Module with numerically robust implementation of the hyper-exponentially-
##### modified Gaussian probability density function
##### Author: Stefan Paul

##### Import packages
import numpy as np
import lmfit as fit
from numpy import exp
from math import sqrt
import scipy.special.cython_special
from scipy.special import erfc, erfcx
from numba.extending import get_cython_function_address
from numba import vectorize, njit
import ctypes
import mpmath as mp
erfc_mp = np.frompyfunc(mp.erfc,1,1)
exp_mp = np.frompyfunc(mp.exp,1,1)

norm_precision = 1e-06 # level on which eta parameters must agree with unity

################################################################################
##### Define numba versions of scipy.special's erfc and erfcx functions using
##### the corresponding C functions from scipy.special.cython_special
erfc_addr = get_cython_function_address("scipy.special.cython_special",
                                        "__pyx_fuse_1erfc")
erfc_functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double)
c_erfc = erfc_functype(erfc_addr)

@vectorize('float64(float64)')
def _vec_erfc(x):
    return c_erfc(x)

@njit
def _erfc_jit(arg):
    return _vec_erfc(arg)

erfcx_addr = get_cython_function_address("scipy.special.cython_special",
                                         "__pyx_fuse_1erfcx")
erfcx_functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double)
c_erfcx = erfcx_functype(erfcx_addr)

@vectorize('float64(float64)')
def _vec_erfcx(x):
    return c_erfcx(x)

@njit
def _erfcx_jit(arg):
    return _vec_erfcx(arg)


################################################################################
##### Define general Hyper-EMG functions

[docs]@njit
def h_m_i(x,mu,sigma,eta_m,tau_m):
    """Internal helper function to calculate single negative EMG tail for
    h_m_emg."""
    erfcarg = np.atleast_1d(sigma/(sqrt(2)*tau_m) + (x-mu)/(sqrt(2)*sigma))
    mask = (erfcarg < 0)
    ret = np.empty_like(x)
    # Use Gauss*erfcx formulation to avoid overflow of exp and underflow of
    # erfc at larger pos. arguments:
    Gauss_erfcx = exp( -0.5*((x[~mask]-mu)/sigma)**2 )*_erfcx_jit(erfcarg[~mask])
    ret[~mask] = eta_m/(2*tau_m)*Gauss_erfcx
    # Use exp*erfc formulation to avoid overflow of erfcx at larger neg.
    # arguments:
    exp_erfc = exp(0.5*(sigma/tau_m)**2 + (x[mask]-mu)/tau_m)*_erfc_jit(erfcarg[mask])
    ret[mask] = 0.5*eta_m/tau_m*exp_erfc

    return ret


[docs]def h_m_i_prec(x,mu,sigma,eta_m,tau_m):
    """Arbitrary precision version of internal helper function for h_m_emg."""
    expval = exp_mp( 0.5*(sigma/tau_m)**2 + (x-mu)/tau_m )
    erfcval = erfc_mp( sigma/(sqrt(2)*tau_m) + (x-mu)/(sqrt(2)*sigma) )
    ret = 0.5*eta_m/tau_m*expval*erfcval
    return ret.astype(float)


[docs]@njit
def h_m_emg(x, mu, sigma, li_eta_m,li_tau_m):
    """Negative skewed exponentially-modified Gaussian (EMG) distribution.

    The lengths of `li_eta_m` & `li_tau_m` must match and define the order of
    negative tails.

    Parameters
    ----------
    x  : float >= 0
        Abscissa data (mass data).
    mu : float >= 0
        Mean value of underlying Gaussian distribution.
    sigma : float >= 0
        Standard deviation of underlying Gaussian distribution.
    li_eta_m : tuple
        Tuple containing the neg. tail weights with the signature:
        ``(eta_m1, eta_m2, ...)``.
    li_tau_m : tuple
        Tuple containing the neg. tail decay constants with the signature:
        ``(tau_m1, tau_m2, ...)``.

    Returns
    -------
    float
        Ordinate values of the negative skewed EMG distribution.

    Notes
    -----
    The Hyper-EMG probability distribution function was first introduced in
    `this publication`_ by Purushothaman et al. [#]_. The basic definitions and
    notations used here are adapted from this work.

    Each negative tail of a Hyper-EMG function can be expressed in two
    equivalent ways:

    .. math::

        h_\mathrm{emg,-i} = \\frac{\\eta_{-i}}{2\\tau_{-i}} \\exp{(-\\left(\\frac{x-\\mu}{\\sqrt{2}\\sigma}\\right)^2)} \mathrm{erfcx}(v)
        = \\frac{\\eta_{-i}}{2\\tau_{-i}} \\exp{(u)} \mathrm{erfc}(v),

    where :math:`u = \\frac{\\sigma}{\\sqrt{2}\\tau_{-i}} + \\frac{x-\mu}{\\sqrt{2}\\tau_{-i}}`
    and :math:`v = \\frac{\\sigma}{\\sqrt{2}\\tau_{-i}} + \\frac{x-\mu}{\\sqrt{2}\\sigma}`.
    In double float precision, the `exp(u)`_ routine overflows if u > 709.78. The
    complementary error function `erfc(v)`_ underflows to 0.0 if v > 26.54. The
    scaled complementary error function `erfcx(v)`_ overflows if v < -26.62. To
    circumvent those scenarios and always ensure an exact result, the underlying
    helper function for the calculation of a negative EMG tail :func:`h_m_i`
    uses the formulation in terms of `erfcx` whenever v >= 0 and switches to the
    `erfc`-formulation when v < 0.

    .. _`exp(u)`: https://numpy.org/doc/stable/reference/generated/numpy.exp.html#numpy.exp
    .. _`erfc(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfc.html
    .. _`erfcx(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfcx.html
    .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913

    References
    ----------
    .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution
       function composed of Exponentially Modified Gaussian distributions to
       analyze asymmetric peak shapes in high-resolution time-of-flight mass
       spectrometry." International Journal of Mass Spectrometry 421 (2017):
       245-254.

    """
    li_eta_m = np.array(li_eta_m).astype(np.float_)
    li_tau_m = np.array(li_tau_m).astype(np.float_)
    t_order_m = len(li_eta_m) # order of negative tail exponentials
    sum_eta_m = 0.
    for i in range(t_order_m):
        sum_eta_m += li_eta_m[i]
    if abs(sum_eta_m - 1) > norm_precision:  # check normalization of eta_m's
        raise Exception("eta_m's don't add up to 1.")
    if len(li_tau_m) != t_order_m:  # check if all arguments match tail order
        raise Exception("orders of eta_m and tau_m do not match!")

    h_m = np.zeros_like(x)
    for i in range(t_order_m):
        eta_m = li_eta_m[i]
        tau_m = li_tau_m[i]
        h_m += h_m_i(x,mu,sigma,eta_m,tau_m)
    return h_m


[docs]@njit
def h_p_i(x,mu,sigma,eta_p,tau_p):
    """Internal helper function to calculate single positive EMG tail for
    h_p_emg."""
    erfcarg = np.atleast_1d(sigma/(sqrt(2)*tau_p) - (x-mu)/(sqrt(2)*sigma))
    mask = (erfcarg < 0)
    ret = np.empty_like(x)
    # Use Gauss*erfcx formulation to avoid overflow of exp and underflow of
    # erfc at larger pos. arguments:
    Gauss_erfcx = exp( -0.5*((x[~mask]-mu)/sigma)**2 )*_erfcx_jit(erfcarg[~mask])
    ret[~mask] = eta_p/(2*tau_p)*Gauss_erfcx
    # Use exp*erfc formulation to avoid overflow of erfcx at larger neg.
    # arguments:
    exp_erfc = exp(0.5*(sigma/tau_p)**2 - (x[mask]-mu)/tau_p)*_erfc_jit(erfcarg[mask])
    ret[mask] = 0.5*eta_p/tau_p*exp_erfc

    return ret


[docs]def h_p_i_prec(x,mu,sigma,eta_p,tau_p):
    """Arbitrary precision version of internal helper function for h_p_emg."""
    expval = exp_mp( 0.5*(sigma/tau_p)**2 - (x-mu)/tau_p )
    erfcval = erfc_mp( sigma/(sqrt(2)*tau_p) - (x-mu)/(sqrt(2)*sigma) )
    ret = 0.5*eta_p/tau_p*expval*erfcval
    return ret.astype(float)


[docs]@njit
def h_p_emg(x, mu, sigma, li_eta_p, li_tau_p):
    """Positive skewed exponentially-modified Gaussian (EMG) distribution.

    The lengths of `li_eta_p` & `li_tau_p` must match and define the order of
    positive tails.

    Parameters
    ----------
    x  : float >= 0
        Abscissa data (mass data).
    mu : float >= 0
        Mean value of underlying Gaussian distribution.
    sigma : float >= 0
        Standard deviation of underlying Gaussian distribution.
    li_eta_p : tuple
        Tuple containing the pos. tail weights with the signature:
        ``(eta_p1, eta_p2, ...)``.
    li_tau_p : tuple
        Tuple containing the pos. tail decay constants with the signature:
        ``(tau_p1, tau_p2, ...)``.

    Returns
    -------
    float
        Ordinate values of the positive skewed EMG distribution.

    Notes
    -----
    The Hyper-EMG probability distribution function was first introduced in
    `this publication`_ by Purushothaman et al. [#]_. The basic definitions and
    notations used here are adapted from this work.

    Each positive tail of a Hyper-EMG function can be expressed in two
    equivalent ways:

    .. math::

        h_\mathrm{emg,+i} = \\frac{\\eta_{+i}}{2\\tau_{+i}} \\exp{(-\\left(\\frac{x-\\mu}{\\sqrt{2}\\sigma}\\right)^2)} \mathrm{erfcx}(v)
        = \\frac{\\eta_{+i}}{2\\tau_{+i}} \\exp{(u)} \mathrm{erfc}(v),

    where :math:`u = \\frac{\\sigma}{\\sqrt{2}\\tau_{+i}} - \\frac{x-\mu}{\\sqrt{2}\\tau_{+i}}`
    and :math:`v = \\frac{\\sigma}{\\sqrt{2}\\tau_{+i}} - \\frac{x-\mu}{\\sqrt{2}\\sigma}`.
    In double precision, the `exp(u)`_ routine overflows if u > 709.78. The
    complementary error function `erfc(v)`_ underflows to 0.0 if v > 26.54. The
    scaled complementary error function `erfcx(v)`_ overflows if v < -26.62. To
    circumvent those scenarios and always ensure an exact result, the underlying
    helper function for the calculation of a negative EMG tail :func:`h_m_i`
    uses the formulation in terms of `erfcx` whenever v >= 0 and switches to the
    `erfc`-formulation when v < 0.

    .. _`exp(u)`: https://numpy.org/doc/stable/reference/generated/numpy.exp.html#numpy.exp
    .. _`erfc(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfc.html
    .. _`erfcx(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfcx.html
    .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913

    References
    ----------
    .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution
       function composed of Exponentially Modified Gaussian distributions to
       analyze asymmetric peak shapes in high-resolution time-of-flight mass
       spectrometry." International Journal of Mass Spectrometry 421 (2017):
       245-254.

    """
    li_eta_p = np.array(li_eta_p).astype(np.float_)
    li_tau_p = np.array(li_tau_p).astype(np.float_)
    t_order_p = len(li_eta_p) # order of positive tails
    sum_eta_p = 0.
    for i in range(t_order_p):
        sum_eta_p += li_eta_p[i]
    if abs(sum_eta_p - 1) > norm_precision:  # check normalization of eta_p's
        raise Exception("eta_p's don't add up to 1.")
    if len(li_tau_p) != t_order_p:  # check if all arguments match tail order
        raise Exception("orders of eta_p and tau_p do not match!")

    h_p = np.zeros_like(x)
    for i in range(t_order_p):
        eta_p = li_eta_p[i]
        tau_p = li_tau_p[i]
        h_p += h_p_i(x,mu,sigma,eta_p,tau_p)
    return h_p


[docs]@njit
def h_emg(x, mu, sigma , theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p):
    """Hyper-exponentially-modified Gaussian distribution (hyper-EMG).

    The lengths of `li_eta_m` & `li_tau_m` must match and define the order of
    negative tails. Likewise, the lengths of `li_eta_p` & `li_tau_p` must match
    and define the order of positive tails.

    Parameters
    ----------
    x  : float >= 0
        Abscissa data (mass data).
    mu : float >= 0
        Mean value of underlying Gaussian distribution.
    sigma : float >= 0
        Standard deviation of underlying Gaussian distribution.
    theta : float, 0 <= theta <= 1
        Left-right-weight factor (negative-skewed EMG weight: theta;
        positive-skewed EMG weight: 1 - theta).
    li_eta_m : tuple
        Tuple containing the neg. tail weights with the signature:
        ``(eta_m1, eta_m2, ...)``.
    li_tau_m : tuple
        Tuple containing the neg. tail decay constants with the signature:
        ``(tau_m1, tau_m2, ...)``.
    li_eta_p : tuple
        Tuple containing the pos. tail weights with the signature:
        ``(eta_p1, eta_p2, ...)``.
    li_tau_p : tuple
        Tuple containing the pos. tail decay constants with the signature:
        ``(tau_p1, tau_p2, ...)``.

    Returns
    -------
    float
        Ordinate of hyper-EMG distribution

    See also
    --------
    :func:`h_m_emg`
    :func:`h_p_emg`

    Notes
    -----
    The Hyper-EMG probability distribution function was first introduced in
    `this publication`_ by Purushothaman et al. [#]_. The basic definitions and
    notations used here are adapted from this work.

    The total hyper-EMG distribution `h_m_emg` is comprised of the negative- and
    positive-skewed EMG distributions `h_m_emg` and `h_p_emg` respectively and
    is calculated as:
    ``h_emg(x, mu, sigma, theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p) =``
    ``theta*h_m_emg(x, mu, sigma, li_eta_m, li_tau_m) +
    (1-theta)*h_p_emg(x, mu, sigma, li_eta_p, li_tau_p)``.

    For algorithmic details, see `Notes` of :func:`h_m_emg` and :func:`h_p_emg`.

    .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913

    References
    ----------
    .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution
       function composed of Exponentially Modified Gaussian distributions to
       analyze asymmetric peak shapes in high-resolution time-of-flight mass
       spectrometry." International Journal of Mass Spectrometry 421 (2017):
       245-254.

    """
    if theta == 1:
        h = h_m_emg(x, mu, sigma, li_eta_m, li_tau_m)
    elif theta == 0:
        h = h_p_emg(x, mu, sigma, li_eta_p, li_tau_p)
    else:
        neg_tail = h_m_emg(x, mu, sigma, li_eta_m, li_tau_m)
        pos_tail = h_p_emg(x, mu, sigma, li_eta_p, li_tau_p)
        h = theta*neg_tail + (1-theta)*pos_tail
    return h


[docs]def mu_emg(mu, theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p):
    """Calculate mean of hyper-EMG distribution.

    The lengths of `li_eta_m` & `li_tau_m` must match and define the order of
    negative tails. Likewise, the lengths of `li_eta_p` & `li_tau_p` must match
    and define the order of positive tails.

    Parameters
    ----------
    mu : float >= 0
        Mean value of underlying Gaussian distribution.
    theta : float, 0 <= theta <= 1
        Left-right-weight factor (negative-skewed EMG weight: theta;
        positive-skewed EMG weight: 1 - theta).
    li_eta_m : tuple
        Tuple containing the neg. tail weights with the signature:
        ``(eta_m1, eta_m2, ...)``.
    li_tau_m : tuple
        Tuple containing the neg. tail decay constants with the signature:
        ``(tau_m1, tau_m2, ...)``.
    li_eta_p : tuple
        Tuple containing the pos. tail weights with the signature:
        ``(eta_p1, eta_p2, ...)``.
    li_tau_p : tuple
        Tuple containing the pos. tail decay constants with the signature:
        ``(tau_p1, tau_p2, ...)``.

    Returns
    -------
    float
        Mean of hyper-EMG distribution.

    Notes
    -----
    The Hyper-EMG probability distribution function was first introduced in
    `this publication`_ by Purushothaman et al. [#]_. The basic definitions and
    notations used here are adapted from this work.

    .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913

    References
    ----------
    .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution
       function composed of Exponentially Modified Gaussian distributions to
       analyze asymmetric peak shapes in high-resolution time-of-flight mass
       spectrometry." International Journal of Mass Spectrometry 421 (2017):
       245-254.

    """
    if abs(sum(li_eta_m) - 1) > norm_precision:  # check normalization of eta_m's
        raise Exception("eta_m's don't add up to 1.")
    t_order_m = len(li_eta_m)
    sum_M_mh = 0
    for i in range(t_order_m):
        sum_M_mh += li_eta_m[i]*li_tau_m[i]

    if abs(sum(li_eta_p) - 1) > norm_precision:  # check normalization of eta_p's
        raise Exception("eta_p's don't add up to 1.")
    t_order_p = len(li_eta_p)
    sum_M_ph = 0
    for i in range(t_order_p):
        sum_M_ph += li_eta_p[i]*li_tau_p[i]

    return mu - theta*sum_M_mh + (1-theta)*sum_M_ph


[docs]def sigma_emg(sigma, theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p):
    """Calculate standard deviation of hyper-EMG distribution.

    The lengths of `li_eta_m` & `li_tau_m` must match and define the order of
    negative tails. Likewise, the lengths of `li_eta_p` & `li_tau_p` must match
    and define the order of positive tails.

    Parameters
    ----------
    sigma : float >= 0
        Standard deviation of underlying Gaussian distribution.
    theta : float, 0 <= theta <= 1
        Left-right-weight factor (negative-skewed EMG weight: theta;
        positive-skewed EMG weight: 1 - theta).
    li_eta_m : tuple
        Tuple containing the neg. tail weights with the signature:
        ``(eta_m1, eta_m2, ...)``.
    li_tau_m : tuple
        Tuple containing the neg. tail decay constants with the signature:
        ``(tau_m1, tau_m2, ...)``.
    li_eta_p : tuple
        Tuple containing the pos. tail weights with the signature:
        ``(eta_p1, eta_p2, ...)``.
    li_tau_p : tuple
        Tuple containing the pos. tail decay constants with the signature:
        ``(tau_p1, tau_p2, ...)``.

    Returns
    -------
    float
        Standard deviation of hyper-EMG distribution.

    Notes
    -----
    The Hyper-EMG probability distribution function was first introduced in
    `this publication`_ by Purushothaman et al. [#]_. The basic definitions and
    notations used here are adapted from this work.

    .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913

    References
    ----------
    .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution
       function composed of Exponentially Modified Gaussian distributions to
       analyze asymmetric peak shapes in high-resolution time-of-flight mass
       spectrometry." International Journal of Mass Spectrometry 421 (2017):
       245-254.

    """
    if abs(sum(li_eta_m) - 1) > norm_precision:  # check normalization of eta_m's
        raise Exception("eta_m's don't add up to 1.")
    t_order_m = len(li_eta_m)

    sum_M_mh = 0
    sum_S_mh = 0
    for i in range(t_order_m):
        sum_M_mh += li_eta_m[i]* li_tau_m[i]
        sum_S_mh += (li_eta_m[i] + li_eta_m[i]*(1.-li_eta_m[i])**2)*li_tau_m[i]**2

    if abs(sum(li_eta_p) - 1) > norm_precision:  # check normalization of eta_p's
        raise Exception("eta_p's don't add up to 1.")
    t_order_p = len(li_eta_p)
    sum_M_ph = 0
    sum_S_ph = 0
    for i in range(t_order_p):
        sum_M_ph += li_eta_p[i]* li_tau_p[i]
        sum_S_ph += (li_eta_p[i] + li_eta_p[i]*(1.-li_eta_p[i])**2)*li_tau_p[i]**2

    S_h = sigma**2 + theta*sum_S_mh + (1-theta)*sum_S_ph + theta*(1.-theta)*(sum_M_mh + sum_M_ph)**2
    return np.sqrt(S_h)


################################################################################