Source code for cvanmf.models

"""Load existing Enterosignature models."""
import logging
from importlib.resources import files
from typing import NamedTuple, List, Optional, Dict, Union

import pandas as pd

logger: logging.Logger = logging.getLogger(__name__)

FIVE_ES_COLORS = {
    "ES_Bact": "#E69F00",
    "ES_Firm": "#023e8a",
    "ES_Prev": "#D55E00",
    "ES_Bifi": "#009E73",
    "ES_Esch": "#483838"
}


[docs] class Signatures(NamedTuple): """Definition of an existing signature model. This provides the definition of existing signatures required to reapply the signature model to new data. Where Decomposition stores the input and H matrix, these are not necessary for transforming new data. Rather, we only need the W matrix, the colors associated with each signature (for consistency of representation), and the preprocessing steps (to match features in the new data with those in the W matrix).""" w: pd.DataFrame """Feature weights (W matrix) for this model.""" colors: Union[List[str], Dict[str, str]] """Color for each signature in the model.""" feature_match: 'FeatureMatch' """Function to map features in new data to those in the model W matrix.""" input_validation: 'InputValidation' = lambda x: x """Function to validate and potentially transform input table. Defaults to identity function""" citation: Optional[str] = None """Citation when using this model."""
[docs] def reapply(self, y: pd.DataFrame, **kwargs) -> 'Decomposition': """Transform new data using this signature model. :param y: New data of same type as the existing model. """ from cvanmf import reapply return reapply._reapply_model( y=y, **(self._asdict() | kwargs) )
[docs] def five_es() -> Signatures: """The 5 Enterosignature model of Frioux et al. (2023, https://doi.org/10.1016/j.chom.2023.05.024). A summary of this model can also be found on the website https://enterosignatures.quadram.ac.uk. The `reapply` method for this model will normalise (total-sum-scale) input data after applying filters to match model format, so data provided does not need to be normalised. :return: 5 Enterosignature model :type: Signatures """ w: pd.DataFrame = pd.read_csv( str(files("cvanmf.data").joinpath("ES5_W.tsv")), sep="\t", index_col=0 ) citation: str = ( "Frioux, C. et al. Enterosignatures define common bacterial guilds in " "the human gut microbiome. Cell Host & Microbe 31, 1111-1125.e6 (" "2023). https://doi.org/10.1016/j.chom.2023.05.024") logger.warning("If you use the 5ES model please cite %s", citation) from cvanmf import reapply return Signatures(w=w, colors=FIVE_ES_COLORS, feature_match=reapply.match_genera, input_validation=reapply.validate_genus_table, citation=( "Frioux, C. et al. Enterosignatures define common " "bacterial guilds in the human gut microbiome. " "Cell Host & Microbe 31, 1111-1125.e6 (2023)." " https://doi.org/10.1016/j.chom.2023.05.024") )