Source code for emgfit.emg_funcs

################################################################################
##### Module with numerically robust implementation of the hyper-exponentially-
##### modified Gaussian probability density function
##### Author: Stefan Paul

##### Import packages
import numpy as np
import lmfit as fit
from numpy import exp
from math import sqrt
import scipy.special.cython_special
from scipy.special import erfc, erfcx
from numba.extending import get_cython_function_address
from numba import vectorize, njit
import ctypes
import mpmath as mp
erfc_mp = np.frompyfunc(mp.erfc,1,1)
exp_mp = np.frompyfunc(mp.exp,1,1)

norm_precision = 1e-06 # level on which eta parameters must agree with unity

################################################################################
##### Define numba versions of scipy.special's erfc and erfcx functions using
##### the corresponding C functions from scipy.special.cython_special
erfc_addr = get_cython_function_address("scipy.special.cython_special",
                                        "__pyx_fuse_1erfc")
erfc_functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double)
c_erfc = erfc_functype(erfc_addr)

@vectorize('float64(float64)')
def _vec_erfc(x):
    return c_erfc(x)

@njit
def _erfc_jit(arg):
    return _vec_erfc(arg)

erfcx_addr = get_cython_function_address("scipy.special.cython_special",
                                         "__pyx_fuse_1erfcx")
erfcx_functype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double)
c_erfcx = erfcx_functype(erfcx_addr)

@vectorize('float64(float64)')
def _vec_erfcx(x):
    return c_erfcx(x)

@njit
def _erfcx_jit(arg):
    return _vec_erfcx(arg)


################################################################################
##### Define general Hyper-EMG functions

[docs]@njit def h_m_i(x,mu,sigma,eta_m,tau_m): """Internal helper function to calculate single negative EMG tail for h_m_emg.""" erfcarg = np.atleast_1d(sigma/(sqrt(2)*tau_m) + (x-mu)/(sqrt(2)*sigma)) mask = (erfcarg < 0) ret = np.empty_like(x) # Use Gauss*erfcx formulation to avoid overflow of exp and underflow of # erfc at larger pos. arguments: Gauss_erfcx = exp( -0.5*((x[~mask]-mu)/sigma)**2 )*_erfcx_jit(erfcarg[~mask]) ret[~mask] = eta_m/(2*tau_m)*Gauss_erfcx # Use exp*erfc formulation to avoid overflow of erfcx at larger neg. # arguments: exp_erfc = exp(0.5*(sigma/tau_m)**2 + (x[mask]-mu)/tau_m)*_erfc_jit(erfcarg[mask]) ret[mask] = 0.5*eta_m/tau_m*exp_erfc return ret
[docs]def h_m_i_prec(x,mu,sigma,eta_m,tau_m): """Arbitrary precision version of internal helper function for h_m_emg.""" expval = exp_mp( 0.5*(sigma/tau_m)**2 + (x-mu)/tau_m ) erfcval = erfc_mp( sigma/(sqrt(2)*tau_m) + (x-mu)/(sqrt(2)*sigma) ) ret = 0.5*eta_m/tau_m*expval*erfcval return ret.astype(float)
[docs]@njit def h_m_emg(x, mu, sigma, li_eta_m,li_tau_m): """Negative skewed exponentially-modified Gaussian (EMG) distribution. The lengths of `li_eta_m` & `li_tau_m` must match and define the order of negative tails. Parameters ---------- x : float >= 0 Abscissa data (mass data). mu : float >= 0 Mean value of underlying Gaussian distribution. sigma : float >= 0 Standard deviation of underlying Gaussian distribution. li_eta_m : tuple Tuple containing the neg. tail weights with the signature: ``(eta_m1, eta_m2, ...)``. li_tau_m : tuple Tuple containing the neg. tail decay constants with the signature: ``(tau_m1, tau_m2, ...)``. Returns ------- float Ordinate values of the negative skewed EMG distribution. Notes ----- The Hyper-EMG probability distribution function was first introduced in `this publication`_ by Purushothaman et al. [#]_. The basic definitions and notations used here are adapted from this work. Each negative tail of a Hyper-EMG function can be expressed in two equivalent ways: .. math:: h_\mathrm{emg,-i} = \\frac{\\eta_{-i}}{2\\tau_{-i}} \\exp{(-\\left(\\frac{x-\\mu}{\\sqrt{2}\\sigma}\\right)^2)} \mathrm{erfcx}(v) = \\frac{\\eta_{-i}}{2\\tau_{-i}} \\exp{(u)} \mathrm{erfc}(v), where :math:`u = \\frac{\\sigma}{\\sqrt{2}\\tau_{-i}} + \\frac{x-\mu}{\\sqrt{2}\\tau_{-i}}` and :math:`v = \\frac{\\sigma}{\\sqrt{2}\\tau_{-i}} + \\frac{x-\mu}{\\sqrt{2}\\sigma}`. In double float precision, the `exp(u)`_ routine overflows if u > 709.78. The complementary error function `erfc(v)`_ underflows to 0.0 if v > 26.54. The scaled complementary error function `erfcx(v)`_ overflows if v < -26.62. To circumvent those scenarios and always ensure an exact result, the underlying helper function for the calculation of a negative EMG tail :func:`h_m_i` uses the formulation in terms of `erfcx` whenever v >= 0 and switches to the `erfc`-formulation when v < 0. .. _`exp(u)`: https://numpy.org/doc/stable/reference/generated/numpy.exp.html#numpy.exp .. _`erfc(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfc.html .. _`erfcx(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfcx.html .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913 References ---------- .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution function composed of Exponentially Modified Gaussian distributions to analyze asymmetric peak shapes in high-resolution time-of-flight mass spectrometry." International Journal of Mass Spectrometry 421 (2017): 245-254. """ li_eta_m = np.array(li_eta_m).astype(np.float_) li_tau_m = np.array(li_tau_m).astype(np.float_) t_order_m = len(li_eta_m) # order of negative tail exponentials sum_eta_m = 0. for i in range(t_order_m): sum_eta_m += li_eta_m[i] if abs(sum_eta_m - 1) > norm_precision: # check normalization of eta_m's raise Exception("eta_m's don't add up to 1.") if len(li_tau_m) != t_order_m: # check if all arguments match tail order raise Exception("orders of eta_m and tau_m do not match!") h_m = np.zeros_like(x) for i in range(t_order_m): eta_m = li_eta_m[i] tau_m = li_tau_m[i] h_m += h_m_i(x,mu,sigma,eta_m,tau_m) return h_m
[docs]@njit def h_p_i(x,mu,sigma,eta_p,tau_p): """Internal helper function to calculate single positive EMG tail for h_p_emg.""" erfcarg = np.atleast_1d(sigma/(sqrt(2)*tau_p) - (x-mu)/(sqrt(2)*sigma)) mask = (erfcarg < 0) ret = np.empty_like(x) # Use Gauss*erfcx formulation to avoid overflow of exp and underflow of # erfc at larger pos. arguments: Gauss_erfcx = exp( -0.5*((x[~mask]-mu)/sigma)**2 )*_erfcx_jit(erfcarg[~mask]) ret[~mask] = eta_p/(2*tau_p)*Gauss_erfcx # Use exp*erfc formulation to avoid overflow of erfcx at larger neg. # arguments: exp_erfc = exp(0.5*(sigma/tau_p)**2 - (x[mask]-mu)/tau_p)*_erfc_jit(erfcarg[mask]) ret[mask] = 0.5*eta_p/tau_p*exp_erfc return ret
[docs]def h_p_i_prec(x,mu,sigma,eta_p,tau_p): """Arbitrary precision version of internal helper function for h_p_emg.""" expval = exp_mp( 0.5*(sigma/tau_p)**2 - (x-mu)/tau_p ) erfcval = erfc_mp( sigma/(sqrt(2)*tau_p) - (x-mu)/(sqrt(2)*sigma) ) ret = 0.5*eta_p/tau_p*expval*erfcval return ret.astype(float)
[docs]@njit def h_p_emg(x, mu, sigma, li_eta_p, li_tau_p): """Positive skewed exponentially-modified Gaussian (EMG) distribution. The lengths of `li_eta_p` & `li_tau_p` must match and define the order of positive tails. Parameters ---------- x : float >= 0 Abscissa data (mass data). mu : float >= 0 Mean value of underlying Gaussian distribution. sigma : float >= 0 Standard deviation of underlying Gaussian distribution. li_eta_p : tuple Tuple containing the pos. tail weights with the signature: ``(eta_p1, eta_p2, ...)``. li_tau_p : tuple Tuple containing the pos. tail decay constants with the signature: ``(tau_p1, tau_p2, ...)``. Returns ------- float Ordinate values of the positive skewed EMG distribution. Notes ----- The Hyper-EMG probability distribution function was first introduced in `this publication`_ by Purushothaman et al. [#]_. The basic definitions and notations used here are adapted from this work. Each positive tail of a Hyper-EMG function can be expressed in two equivalent ways: .. math:: h_\mathrm{emg,+i} = \\frac{\\eta_{+i}}{2\\tau_{+i}} \\exp{(-\\left(\\frac{x-\\mu}{\\sqrt{2}\\sigma}\\right)^2)} \mathrm{erfcx}(v) = \\frac{\\eta_{+i}}{2\\tau_{+i}} \\exp{(u)} \mathrm{erfc}(v), where :math:`u = \\frac{\\sigma}{\\sqrt{2}\\tau_{+i}} - \\frac{x-\mu}{\\sqrt{2}\\tau_{+i}}` and :math:`v = \\frac{\\sigma}{\\sqrt{2}\\tau_{+i}} - \\frac{x-\mu}{\\sqrt{2}\\sigma}`. In double precision, the `exp(u)`_ routine overflows if u > 709.78. The complementary error function `erfc(v)`_ underflows to 0.0 if v > 26.54. The scaled complementary error function `erfcx(v)`_ overflows if v < -26.62. To circumvent those scenarios and always ensure an exact result, the underlying helper function for the calculation of a negative EMG tail :func:`h_m_i` uses the formulation in terms of `erfcx` whenever v >= 0 and switches to the `erfc`-formulation when v < 0. .. _`exp(u)`: https://numpy.org/doc/stable/reference/generated/numpy.exp.html#numpy.exp .. _`erfc(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfc.html .. _`erfcx(v)`: https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.erfcx.html .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913 References ---------- .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution function composed of Exponentially Modified Gaussian distributions to analyze asymmetric peak shapes in high-resolution time-of-flight mass spectrometry." International Journal of Mass Spectrometry 421 (2017): 245-254. """ li_eta_p = np.array(li_eta_p).astype(np.float_) li_tau_p = np.array(li_tau_p).astype(np.float_) t_order_p = len(li_eta_p) # order of positive tails sum_eta_p = 0. for i in range(t_order_p): sum_eta_p += li_eta_p[i] if abs(sum_eta_p - 1) > norm_precision: # check normalization of eta_p's raise Exception("eta_p's don't add up to 1.") if len(li_tau_p) != t_order_p: # check if all arguments match tail order raise Exception("orders of eta_p and tau_p do not match!") h_p = np.zeros_like(x) for i in range(t_order_p): eta_p = li_eta_p[i] tau_p = li_tau_p[i] h_p += h_p_i(x,mu,sigma,eta_p,tau_p) return h_p
[docs]@njit def h_emg(x, mu, sigma , theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p): """Hyper-exponentially-modified Gaussian distribution (hyper-EMG). The lengths of `li_eta_m` & `li_tau_m` must match and define the order of negative tails. Likewise, the lengths of `li_eta_p` & `li_tau_p` must match and define the order of positive tails. Parameters ---------- x : float >= 0 Abscissa data (mass data). mu : float >= 0 Mean value of underlying Gaussian distribution. sigma : float >= 0 Standard deviation of underlying Gaussian distribution. theta : float, 0 <= theta <= 1 Left-right-weight factor (negative-skewed EMG weight: theta; positive-skewed EMG weight: 1 - theta). li_eta_m : tuple Tuple containing the neg. tail weights with the signature: ``(eta_m1, eta_m2, ...)``. li_tau_m : tuple Tuple containing the neg. tail decay constants with the signature: ``(tau_m1, tau_m2, ...)``. li_eta_p : tuple Tuple containing the pos. tail weights with the signature: ``(eta_p1, eta_p2, ...)``. li_tau_p : tuple Tuple containing the pos. tail decay constants with the signature: ``(tau_p1, tau_p2, ...)``. Returns ------- float Ordinate of hyper-EMG distribution See also -------- :func:`h_m_emg` :func:`h_p_emg` Notes ----- The Hyper-EMG probability distribution function was first introduced in `this publication`_ by Purushothaman et al. [#]_. The basic definitions and notations used here are adapted from this work. The total hyper-EMG distribution `h_m_emg` is comprised of the negative- and positive-skewed EMG distributions `h_m_emg` and `h_p_emg` respectively and is calculated as: ``h_emg(x, mu, sigma, theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p) =`` ``theta*h_m_emg(x, mu, sigma, li_eta_m, li_tau_m) + (1-theta)*h_p_emg(x, mu, sigma, li_eta_p, li_tau_p)``. For algorithmic details, see `Notes` of :func:`h_m_emg` and :func:`h_p_emg`. .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913 References ---------- .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution function composed of Exponentially Modified Gaussian distributions to analyze asymmetric peak shapes in high-resolution time-of-flight mass spectrometry." International Journal of Mass Spectrometry 421 (2017): 245-254. """ if theta == 1: h = h_m_emg(x, mu, sigma, li_eta_m, li_tau_m) elif theta == 0: h = h_p_emg(x, mu, sigma, li_eta_p, li_tau_p) else: neg_tail = h_m_emg(x, mu, sigma, li_eta_m, li_tau_m) pos_tail = h_p_emg(x, mu, sigma, li_eta_p, li_tau_p) h = theta*neg_tail + (1-theta)*pos_tail return h
[docs]def mu_emg(mu, theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p): """Calculate mean of hyper-EMG distribution. The lengths of `li_eta_m` & `li_tau_m` must match and define the order of negative tails. Likewise, the lengths of `li_eta_p` & `li_tau_p` must match and define the order of positive tails. Parameters ---------- mu : float >= 0 Mean value of underlying Gaussian distribution. theta : float, 0 <= theta <= 1 Left-right-weight factor (negative-skewed EMG weight: theta; positive-skewed EMG weight: 1 - theta). li_eta_m : tuple Tuple containing the neg. tail weights with the signature: ``(eta_m1, eta_m2, ...)``. li_tau_m : tuple Tuple containing the neg. tail decay constants with the signature: ``(tau_m1, tau_m2, ...)``. li_eta_p : tuple Tuple containing the pos. tail weights with the signature: ``(eta_p1, eta_p2, ...)``. li_tau_p : tuple Tuple containing the pos. tail decay constants with the signature: ``(tau_p1, tau_p2, ...)``. Returns ------- float Mean of hyper-EMG distribution. Notes ----- The Hyper-EMG probability distribution function was first introduced in `this publication`_ by Purushothaman et al. [#]_. The basic definitions and notations used here are adapted from this work. .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913 References ---------- .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution function composed of Exponentially Modified Gaussian distributions to analyze asymmetric peak shapes in high-resolution time-of-flight mass spectrometry." International Journal of Mass Spectrometry 421 (2017): 245-254. """ if abs(sum(li_eta_m) - 1) > norm_precision: # check normalization of eta_m's raise Exception("eta_m's don't add up to 1.") t_order_m = len(li_eta_m) sum_M_mh = 0 for i in range(t_order_m): sum_M_mh += li_eta_m[i]*li_tau_m[i] if abs(sum(li_eta_p) - 1) > norm_precision: # check normalization of eta_p's raise Exception("eta_p's don't add up to 1.") t_order_p = len(li_eta_p) sum_M_ph = 0 for i in range(t_order_p): sum_M_ph += li_eta_p[i]*li_tau_p[i] return mu - theta*sum_M_mh + (1-theta)*sum_M_ph
[docs]def sigma_emg(sigma, theta, li_eta_m, li_tau_m, li_eta_p, li_tau_p): """Calculate standard deviation of hyper-EMG distribution. The lengths of `li_eta_m` & `li_tau_m` must match and define the order of negative tails. Likewise, the lengths of `li_eta_p` & `li_tau_p` must match and define the order of positive tails. Parameters ---------- sigma : float >= 0 Standard deviation of underlying Gaussian distribution. theta : float, 0 <= theta <= 1 Left-right-weight factor (negative-skewed EMG weight: theta; positive-skewed EMG weight: 1 - theta). li_eta_m : tuple Tuple containing the neg. tail weights with the signature: ``(eta_m1, eta_m2, ...)``. li_tau_m : tuple Tuple containing the neg. tail decay constants with the signature: ``(tau_m1, tau_m2, ...)``. li_eta_p : tuple Tuple containing the pos. tail weights with the signature: ``(eta_p1, eta_p2, ...)``. li_tau_p : tuple Tuple containing the pos. tail decay constants with the signature: ``(tau_p1, tau_p2, ...)``. Returns ------- float Standard deviation of hyper-EMG distribution. Notes ----- The Hyper-EMG probability distribution function was first introduced in `this publication`_ by Purushothaman et al. [#]_. The basic definitions and notations used here are adapted from this work. .. _`this publication`: https://www.sciencedirect.com/science/article/abs/pii/S1387380616302913 References ---------- .. [#] Purushothaman, S., et al. "Hyper-EMG: A new probability distribution function composed of Exponentially Modified Gaussian distributions to analyze asymmetric peak shapes in high-resolution time-of-flight mass spectrometry." International Journal of Mass Spectrometry 421 (2017): 245-254. """ if abs(sum(li_eta_m) - 1) > norm_precision: # check normalization of eta_m's raise Exception("eta_m's don't add up to 1.") t_order_m = len(li_eta_m) sum_M_mh = 0 sum_S_mh = 0 for i in range(t_order_m): sum_M_mh += li_eta_m[i]* li_tau_m[i] sum_S_mh += (li_eta_m[i] + li_eta_m[i]*(1.-li_eta_m[i])**2)*li_tau_m[i]**2 if abs(sum(li_eta_p) - 1) > norm_precision: # check normalization of eta_p's raise Exception("eta_p's don't add up to 1.") t_order_p = len(li_eta_p) sum_M_ph = 0 sum_S_ph = 0 for i in range(t_order_p): sum_M_ph += li_eta_p[i]* li_tau_p[i] sum_S_ph += (li_eta_p[i] + li_eta_p[i]*(1.-li_eta_p[i])**2)*li_tau_p[i]**2 S_h = sigma**2 + theta*sum_S_mh + (1-theta)*sum_S_ph + theta*(1.-theta)*(sum_M_mh + sum_M_ph)**2 return np.sqrt(S_h)
################################################################################