Source code for isocor.tests.test_isotopic_cluster_HighRes

"""Test the calculation of high-resolution isotopic clusters (for metabolites with
different number of elements and isotopes) against a brute force implementation.
"""

import math
import itertools as it
import pytest
import numpy as np
import isocor as hrcor


[docs]def get_isoclust_bruteforce(formula, data_iso): """Return the isotopic cluster for a compound using a brute-force method. This method is based on systematic computation of all isotopic species of the molecule. It should be easier to understand than the optimized version but is much slower. """ def npermutations(v): dem = 1 for x in set(v): dem *= math.factorial(v.count(x)) return math.factorial(len(v)) / dem isotopic_cluster = {} # {isotopologue_mass: isotopologue_proba, ...} # For each element, gather the possible combinations of its isotopes element_blocks = [] # list of iterators over isotopes combinations for one element elements_order = [] # e.g.: ["C", "N", "H"] states thats "C" block is at index 0 for element in formula: # number of natural isotopes n_isotopes = len(data_iso[element]["mass"]) iter_iso_combi = it.combinations_with_replacement(range(n_isotopes), # allowed isotopes (index of data_iso) formula[element]) # number of this element in formula elements_order.append(element) element_blocks.append(iter_iso_combi) # Make an iterator over all isotopologues # NB: simply take one combination/block for each element iter_isotopologues = it.product(*element_blocks) # Process each isotopologue for isotopologue in iter_isotopologues: # Compute the mass of the isotopologue mass_by_block = [] for idx_element, combi_isotopes in enumerate(isotopologue): element = elements_order[idx_element] isotope_mass = [data_iso[element]["mass"][idx_iso] for idx_iso in combi_isotopes] mass_by_block.append(sum(isotope_mass)) mass_isotopologue = sum(mass_by_block) # Compute the expected abundance for this isotopologue n_isotopomers_by_block = [] # number of isotopomers caused by each element_block listof_block_proba = [] # proba of each element_block for idx_element, combi_isotopes in enumerate(isotopologue): n_isotopomers_by_block.append(npermutations(combi_isotopes)) element = elements_order[idx_element] # e.g. "C" block_proba = np.prod( [data_iso[element]["abundance"][idx_iso] for idx_iso in combi_isotopes]) listof_block_proba.append(block_proba) proba_isotopologue = np.prod( n_isotopomers_by_block) * np.prod(listof_block_proba) # Save assert mass_isotopologue not in isotopic_cluster isotopic_cluster[mass_isotopologue] = proba_isotopologue return isotopic_cluster
[docs]@pytest.mark.parametrize("formula", [{"C": 1, "N": 1}, {"C": 1, "O": 1}, {"C": 1, "N": 2}, {"C": 1, "O": 2}, {"C": 1, "N": 20, "H": 20, "O": 20, "P": 4}, {"C": 20, "N": 20, "H": 20, "O": 20, "P": 4}]) def test_isoclust_against_bruteforce(formula, data_iso, usr_tolerance): """Check the high-resolution isotopic clusters generation against a brute force implementation.""" str_formula = "".join(["{}{}".format(k, v) for k, v in formula.items()]) # Get the clusters (without tracers) try: del formula["C"] except: pass ic_bruteforce = get_isoclust_bruteforce(formula, data_iso) metabolite = hrcor.HighResMetaboliteCorrector(str_formula, '13C', 1e42, 400, resolution_formula_code="orbitrap", derivative_formula=None, tracer_purity=None, data_isotopes=data_iso, correct_NA_tracer=False, charge=1) metabolite.threshold_p = None ic_optimized = metabolite.get_isotopic_cluster() # Check that all isotopomeres are taken into account np.testing.assert_allclose( sum(ic_bruteforce.values()), 1., rtol=usr_tolerance) np.testing.assert_allclose( sum(ic_optimized.values()), 1., rtol=usr_tolerance) # Check that both methods have the same number of isotopologues assert len(ic_bruteforce) == len(ic_optimized) # Finally, check each mass (key) and each abundance (value) mass_optimized = [float(x) for x in sorted(ic_optimized.keys())] mass_bruteforce = [float(x) for x in sorted(ic_bruteforce.keys())] np.testing.assert_allclose( mass_optimized, mass_bruteforce, rtol=usr_tolerance) abun_optimized = [float(x) for x in sorted(ic_optimized.values())] abun_bruteforce = [float(x) for x in sorted(ic_bruteforce.values())] np.testing.assert_allclose( abun_optimized, abun_bruteforce, rtol=usr_tolerance)