"""Parent classes used by IsoCor to define specific *correctors* sub-classes.
There should be no need to instanciate any of those classes directly.
However, all *correctors* classes should inherit from the classes:
* :py:class:`~LabelledChemical` that stores data related to the metabolite
to correct and checks the inputs,
* :py:class:`~InterfaceMSCorrector` that contractualize what any corrector should be able to do.
"""
import re
import collections
import math
from decimal import Decimal as D
[docs]class LabelledChemical(object):
"""A labeled chemical considered for isotope correction.
This class defines and check the parameters (formulas, tracer, etc.). needed
to model a chemical undergoing MS correction.
Default isotopic data come from:
Isotopic Compositions of the Elements 2013, Pure Appl. Chem.,
2016, Vol. 88, No. 3, pp. 293-306, https://doi.org/10.1515/pac-2015-0503
Warning:
Except specified otherwise, you should never set any attribute at runtime.
Always make a new instance if you wish to change the parameters.
Args:
formula (str): elemental formula of the metabolite moiety (e.g. "C3H7O6P")
inchi (str): InChI of the metabolite (e.g. "InChI=1/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/
h2-11H,1H2/t2-,3-,4+,5-,6+/m1/s1" for alpha- D -glucopyranose).
Note that the InChI might represents the metabolite moiety (e.g. a fragment
ion) or the metabolite, hence its formula may differ from :py:attr:`~formula`.
tracer (str): the isotopic tracer (e.g. "13C")
charge (int): charge of the detected ion
label (str): metabolite abbreviation (e.g. "G3P")
data_isotopes (dict): isotopic data with mass and abundance
as in :py:attr:`~LabelledChemical.DEFAULT_ISODATA`.
If set to `None`, default isotopic data are used.
derivative_formula (str): elemental formula of the derivative moiety
tracer_purity (list): proportion of each isotope of the tracer element (metabolite moiety)
The list must have the same length as the list of isotopes for the relevant
isotope in :py:attr:`~data_isotopes`. Must be normalized to one.
If set to `None`, a perfect purity is assumed.
correct_NA_tracer (bool): flag to correct tracer natural abundance or not.
If set to True, tracer elements of the metabolite moiety will be
corrected for the natural isotopic abundance of the tracer.
Note that tracer elements from the derivative moiety will always be
corrected at natural abundance (by definition of a derivative moiety).
"""
DEFAULT_ISODATA = {"C": {"abundance": [0.9893, 0.0107],
"mass": [D('12.0'), D('13.003354835')]},
"H": {"abundance": [0.999885, 0.000115],
"mass": [D('1.0078250322'), D('2.0141017781')]},
"N": {"abundance": [0.99636, 0.00364],
"mass": [D('14.003074004'), D('15.000108899')]},
"P": {"abundance": [1.],
"mass": [D('30.973761998')]},
"O": {"abundance": [0.99757, 0.00038, 0.00205],
"mass": [D('15.99491462'), D('16.999131757'), D('17.999159613')]},
"S": {"abundance": [0.9499, 0.0075, 0.0425, 0.0, 0.0001],
"mass": [D('31.972071174'), D('32.971458910'), D('33.9678670'), D('35.0'), D('35.967081')]},
"Si": {"abundance": [0.92223, 0.04685, 0.03092],
"mass": [D('27.976926535'), D('28.976494665'), D('29.9737701')]}}
def __init__(self, formula, tracer, derivative_formula, tracer_purity,
correct_NA_tracer, data_isotopes, charge=None, label=None, inchi=None):
"""Initialize a new LabelledChemical with its associated data."""
# Load data_isotope first as it is critical for the other attributes
self._data_isotopes = self.DEFAULT_ISODATA if data_isotopes is None else data_isotopes
self._check_data_isotopes(self._data_isotopes)
# Pseudo-private attributes (mostly to work with properties)
self._molecular_weight = None
self._tracer_purity = tracer_purity
self._correct_NA_tracer = correct_NA_tracer
self._formula = None
self._inchi = inchi if inchi is not None else ""
self._isotopic_inchi = None
self._derivative_formula = None
self._correction_formula = None
self._mzshift_tracer = None
self._str_formula = formula
self._str_derivative_formula = derivative_formula if derivative_formula is not None else ""
self._str_tracer_code = tracer
try:
self._charge = None if charge is None else abs(int(charge))
if self._charge == 0:
raise ValueError(
"'charge' parameter should not be 0 ({})".format(charge))
except:
raise ValueError("'charge' parameter should be a non-null integer ({})".format(charge))
# Protected attributes (user should not see them, but must stay available in sub-class)
self._tracer_el, self._idx_tracer = self._parse_strtracer(
self._str_tracer_code)
# Public attributes that could be modified at runtime (not safe)
# NB: self.data_isotopes is in this category
self.label = label if label is not None else "|".join([self._str_formula,
self._str_derivative_formula,
self._str_tracer_code])
# Check a few things
# NB: in the future those checks should be in the setters
if len(self.data_isotopes[self._tracer_el]["mass"]) != len(self.tracer_purity):
raise ValueError("Unexpected length of tracer purity vector.")
if not self.formula:
raise ValueError("The elemental formula ({}) is empty.".format(self.label))
if self._tracer_el not in self.formula:
raise ValueError("The isotopic tracer ({}) must be present in the"
" metabolite {}.".format(self._tracer_el, self._str_formula))
@property
def data_isotopes(self):
"""dict of dicts: isotopic data with mass and abundance
See :py:attr:`~LabelledChemical.DEFAULT_ISODATA` for an example.
"""
return self._data_isotopes
@property
def formula(self):
"""Counter: elemental formula of the metabolite moiety"""
if self._formula is None:
self._formula = self._parse_strformula(self._str_formula)
return self._formula
@property
def isotopic_inchi(self):
"""Generate isotopic inchis of the corrected fractions, or just the isotopic layer if no
InChI has been provided.
Standard proposed by the InChI Isotopologue and Isotopomer Development Team:
Simple Definition: /a(Ee#<+|->#...)
Complete Definition:
/a(<element><isotope_count><isotope_designation>[,<atom_number>])
<element> - one or two letter Element code (Ee).
<isotope_count> - number of atoms with the designated isotope (#).
<isotope_designation> - isotope designation indicated by a sign (+ or -) and number
indicating the unit mass difference from the rounded average atomic mass of the
element. For example, the average atomic mass of Sn (118.710) is rounded to 119.
We specify two 118 Sn atoms as “/a(Sn2-1)”.
Example:
13C2 isotopologue of alpha-D-glucopyranose:
InChI=1/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6+/m1/s1 /a(C2+1),(C4+0)
Returns:
list: isotopic inchis
"""
if self._isotopic_inchi is None:
tracer_info = self.data_isotopes[self._tracer_el]
average_iso_mass = round(sum([D(tracer_info["abundance"][i]) * tracer_info["mass"][i] for i in range(len(tracer_info["abundance"]))]))
tracer_mass = tracer_info["mass"][self._idx_tracer]
tracer_isotope_designation = round(tracer_mass - average_iso_mass)
isotope_designation_0 = round(tracer_info["mass"][0] - average_iso_mass)
# generate isotopic inchis
self._isotopic_inchi = []
for j in range(self.formula[self._tracer_el] + 1):
if j == 0:
tmp = '{}/a({}{}{:+d})'.format(self._inchi, self._tracer_el, self.formula[self._tracer_el], isotope_designation_0)
elif j == self.formula[self._tracer_el]:
tmp = '{}/a({}{}{:+d})'.format(self._inchi, self._tracer_el, self.formula[self._tracer_el], tracer_isotope_designation)
else:
tmp = '{}/a({}{}{:+d}),({}{}{:+d})'.format(self._inchi, self._tracer_el, j, tracer_isotope_designation, self._tracer_el, self.formula[self._tracer_el] - j, isotope_designation_0)
self._isotopic_inchi.append(tmp)
return self._isotopic_inchi
@property
def derivative_formula(self):
"""Counter: elemental formula of the derivative moiety."""
if self._derivative_formula is None:
self._derivative_formula = self._parse_strformula(
self._str_derivative_formula)
return self._derivative_formula
@property
def charge(self):
"""int: absolute value of the charge of the metabolite."""
return self._charge
@property
def inchi(self):
"""str: inchi of the metabolite."""
return self._inchi
@property
def tracer_purity(self):
"""list: proportion of each isotope for the tracer element of the metabolite"""
if self._tracer_purity is None: # silently replace None by default value
default_purity = [0.0] * \
len(self.data_isotopes[self._tracer_el]["mass"])
default_purity[self._idx_tracer] = 1.0 # perfect purity
self._tracer_purity = default_purity
return self._tracer_purity
@property
def correct_NA_tracer(self):
"""bool: correct tracer natural abundance if set to True"""
return self._correct_NA_tracer
@property
def molecular_weight(self):
"""Decimal: exact molecular weight of the metabolite.
Including the derivative and the tracer.
"""
if self._molecular_weight is None:
tmp = [n*self.data_isotopes[el]["mass"][0]
for el, n in self.formula.items()]
if self._str_derivative_formula is not None:
tmp += [n*self.data_isotopes[el]["mass"][0]
for el, n in self.derivative_formula.items()]
self._molecular_weight = sum(tmp)
return self._molecular_weight
@property
def mzshift_tracer(self):
"""Decimal: mass shift of the tracer
Warning:
Mass shift is computed from the first isotope of the list in :py:attr:`~data_isotopes`.
"""
if self._mzshift_tracer is None:
tracer_mass = self.data_isotopes[self._tracer_el]["mass"][self._idx_tracer]
m0_mass = self.data_isotopes[self._tracer_el]["mass"][0]
self._mzshift_tracer = tracer_mass - m0_mass
assert self._mzshift_tracer > 0, "Unexpected negative tracer mass shift: {}".format(
self._mzshift_tracer)
return self._mzshift_tracer
@property
def correction_formula(self):
"""Counter: molecular formula on which the correction will be applied
This formula is the one on which the correction vector is based.
Remember that for the correction vector we need **non-tracers atoms**
from the measured metabolite (all elements excluding the tracer element)
and **all atoms** from the derivative (including the tracer element).
"""
if self._correction_formula is None:
self._correction_formula = {}
for element, n_atoms in self.formula.items():
if element != self._tracer_el:
self._correction_formula[element] = n_atoms
if self.derivative_formula is not None:
for element, n_atoms in self.derivative_formula.items():
self._correction_formula[element] = self._correction_formula.get(
element, 0) + n_atoms
return self._correction_formula
@staticmethod
def _parse_strformula(str_formula):
"""Parse the molecular formula.
Args:
str_formula (str): molecular formula (e.g. "H2O").
Returns:
dict: the number of each element in a dictionnary-like
counter (e.g. {'H':2,'O':1}).
"""
if str_formula is None:
return None
pformula = re.findall(r'([A-Z][a-z]*)(\d*)', str_formula)
counter = collections.Counter()
for element, cnt in pformula:
if cnt == '':
cnt = 1
counter[element] += int(cnt)
return counter
def _parse_strtracer(self, str_tracer):
"""Parse the tracer code.
Args:
str_tracer (str): tracer code (e.g. "13C")
Returns:
tuple
- (str) tracer element (e.g. "C")
- (int) tracer index in :py:attr:`~data_isotopes`
"""
try:
tracer = re.search(r'(\d*)([A-Z][a-z]*)', str_tracer)
count = int(tracer.group(1))
tracer_el = tracer.group(2)
except (ValueError, AttributeError):
raise ValueError("Invalid tracer code: '{}'."
" Please check your inputs.".format(str_tracer))
best_diff = float("inf")
idx_tracer = None
unexpected_msg = "Unexpected tracer code. Are you sure this isotope is "\
"in data_isotopes? '{}'".format(str_tracer)
assert tracer_el in self.data_isotopes, unexpected_msg
for i, mass in enumerate(self.data_isotopes[tracer_el]["mass"]):
test_diff = abs(mass - count)
if test_diff < best_diff:
best_diff = test_diff
idx_tracer = i
assert best_diff < 0.5, unexpected_msg
return (tracer_el, idx_tracer)
@staticmethod
def _check_data_isotopes(data_isotopes):
"""Check :py:attr:`~data_isotopes` validity
Raises:
ValueError: :py:attr:`~data_isotopes` is corrupted in some way.
"""
tol_isomass = 1.2 # arbitrary max mass allowed between two isotope masses (in u)
for element, data_el in data_isotopes.items():
if "mass" not in data_el or "abundance" not in data_el:
raise ValueError(
"Invalid data_isotopes. Please check your inputs.")
if len(data_el["mass"]) != len(data_el["abundance"]):
raise ValueError("There should ALWAYS be the same number of"
" isotopes mass and abundance in data_isotopes."
" This is not the case for {}.".format(element))
# Mass specific checks
if sorted(data_el["mass"]) != data_el["mass"]:
raise ValueError("Isotopes masses in data_isotopes should"
" ALWAYS be in increasing number. This is not"
" the case for {}.".format(element))
previous_mass = None
for mass in data_el["mass"]:
if previous_mass and mass - previous_mass > tol_isomass:
raise ValueError("It seems that data_isotopes is incomplete"
" and that we are missing data for an isotope"
" between masses {} Da and {} Da"
" for {}.".format(previous_mass, mass, element))
if mass <= 0:
raise ValueError("One or several masses are negatives in"
" data_isotopes for element {}.".format(element))
previous_mass = mass
# Abundance specific checks
if math.fsum(data_el["abundance"]) != 1.:
raise ValueError("The sum of the natural abundance of each isotope"
" should ALWAYS equal 1."
" This is not the case for {}.".format(element))
for abundance in data_el["abundance"]:
if not (0. <= abundance <= 1.):
raise ValueError("One or several natural abundance are invalid"
" probabilities for isotopic data object for element"
" {}.".format(element))
[docs]class InterfaceMSCorrector(object):
"""Interface for Mass Spectrometry correctors.
This class defines the minimal methods that a corrector should implement.
Keep in mind that a corrector must inherit from both :py:class:`~InterfaceMSCorrector`
and :py:class:`~LabelledChemical`.
"""
[docs] class ImproperUsageError(Exception):
"""Raised when the corrector is used incorrectly."""
pass
def __init__(self):
self._correction_matrix = None
@property
def correction_matrix(self):
"""Correction matrix.
The correction matrix will be set once and for all using
:py:meth:`~compute_correction_matrix` during the first access.
Use `del x.correction_matrix` if you wish to reset it.
"""
if self._correction_matrix is None:
self._correction_matrix = self.compute_correction_matrix()
return self._correction_matrix
@correction_matrix.deleter
def correction_matrix(self):
self._correction_matrix = None
[docs] def compute_correction_matrix(self):
"""Returns the correction matrix taking into account all parameters.
Warning:
Does not set :py:attr:`~correction_matrix` if called directly.
Returns:
array: the correction matrix
"""
raise NotImplementedError(
"This method must be overloaded in a child class.")
[docs] def correct(self, measurement):
"""Return corrected measurement vector.
Args:
measurement (list): measured areas
Returns:
tuple:
- corrected_area (array): Corrected area for each peak.
- isotopologue_fraction (list): The abundance of each tracer
isotopologue (corrected area normalized to 1).
- residuum
- mean enrichment
"""
raise NotImplementedError(
"This method must be overloaded in a child class.")