Source code for f2xba.rba_model.rba_model

"""Implementation of RbaModel class used in f2xba.

Extend the XbaModel to become a resource balance constraint model (RBA).
RBA implementation is based on RbaPy (Bulović et al., 2019)

Peter Schubert, CCB, HHU Duesseldorf, December 2022
"""

import os
import re
import pandas as pd
from collections import defaultdict

from .rba_macromolecules import RbaMacromolecules
from .rba_metabolism import RbaMetabolism
from .rba_parameters import RbaParameters, RbaFunction
from .rba_processes import RbaProcesses
from .rba_enzymes import RbaEnzymes
from .rba_densities import RbaDensities
from .rba_targets import RbaTargets
from .rba_medium import RbaMedium
from ..utils.calc_mw import protein_mw_from_aa_comp, rna_mw_from_nt_comp, ssdna_mw_from_dnt_comp, get_seq_composition
from ..utils.mapping_utils import valid_sbml_sid, load_parameter_file
from .initital_assignments import InitialAssignments
import f2xba.prefixes as pf

DEFAULT_GROWTH_RATE = 1.0
"""Growth rate used to configure growth rate dependent parameters in the SBML model."""

DEFAULT_ENZ_SATURATION = 0.5
"""Default enzyme saturation level, unless adjusted by `avg_enz_sat` in table `general`."""

DEFAULT_MICHAELIS_CONSTANT = 1.0
"""Default Michaelis constant KM for medium uptake reactions in mmol/l"""

MAX_ENZ_CONC = 10
"""Upper bound enzyme concentration in mmol/gDW"""

MAX_MM_PROCESSING_FLUX = 10
"""Upper bound for macromolecular synthesis and degradation fluxes in µmol/gDWh"""

MAX_DENSITY_SLACK = 10
"""Upper bound on compartmental density in mmolAA/gDW"""

XML_SPECIES_NS = 'http://www.hhu.de/ccb/rba/species/ns'
XML_REACTION_NS = 'http://www.hhu.de/ccb/rba/reaction/ns'
XML_COMPARTMENT_NS = 'http://www.hhu.de/ccb/rba/compartment/ns'

components = {'parameters', 'dna', 'rnas', 'proteins', 'metabolism',
              'processes', 'enzymes', 'densities', 'targets', 'medium'}



[docs]
class RbaModel:
    """Create a resource balance constraint model (RBA or TRBA) by extending a XbaModel instance.

    RBA mplementation is based on the Python package RbaPy (Bulović et al., 2019).

    Ref: Bulović, A., Fischer, S., Dinh, M., Golib, F., Liebermeister, W., Poirier, C., Tournier,
    L., Klipp, E., Fromion, V., & Goelzer, A. (2019). Automated generation of bacterial resource
    allocation models. Metabolic Engineering, 55, 12-22.
    https://doi.org/https://doi.org/10.1016/j.ymben.2019.06.001

    Example: Create a RBA model by extending an existing genome scale metabolic model.
    The spreadsheet files `xba_parameters.xlsx` and `rba_parameters.xlsx`
    contain configuration data. See tutorials.

    .. code-block:: python

        xba_model = XbaModel('iML1515.xml')
        xba_model.configure('xba_parameters.xlsx')

        rba_model = RbaModel(xba_model)
        rba_model.configure('rba_parameters.xlsx')
        if rba_model.validate():
            rba_model.export('iML1515_RBA.xml')
    """

    def __init__(self, xba_model):
        """Instantiate the RbaModel instance.

        :param xba_model: a reference to a XbaModel instance
        :type xba_model: :class:`f2xba.XbaModel`
        """
        self.model = xba_model
        """Reference to the XbaModel instance."""

        self.dna = RbaMacromolecules('dna')
        """Reference to DNA macromolecule configuration."""

        self.rnas = RbaMacromolecules('rna')
        """Reference to RNA macromolecule configurations."""

        self.proteins = RbaMacromolecules('protein')
        """Reference to protein macromolecule configurations."""

        self.metabolism = RbaMetabolism()
        """Reference to RBA compartment, species and reaction configurations."""

        self.parameters = RbaParameters()
        """Reference to RBA function configurations."""

        self.processes = RbaProcesses()
        """Reference to RBA processes configurations."""

        self.densities = RbaDensities()
        """Reference to RBA compartment density configurations."""

        self.targets = RbaTargets()
        """Reference to RBA targets configurations."""

        self.enzymes = RbaEnzymes()
        """Reference to RBA enzyme configurations."""

        self.medium = RbaMedium()
        """Reference to RBA medium configuration."""

        self.cid_mappings = {}
        """Map compartment identifiers to compartment specific roles."""

        self.mmid2mm = {}
        """Map macromolecules to configuration data."""

        self.initial_assignments = InitialAssignments(self.parameters)
        """Reference to initial assignments used in the SBML model."""

        self.parameter_values = {}
        """Parameter values configured in the model."""

        self.avg_enz_sat = None

    def _get_dummy_translation_targets(self, rba_params):
        """Extract translation targets for dummy proteins.

        Extract translation targets from rba_params['compartments'] and
        return target data for dummy protein translation targets

        :param dict(str, pandas.DataFrame) rba_params: rba configuration data loaded from file
        :return: target configuration data related to dummy proteins
        :rtype: pandas.DataFrame
        """
        df_c_data = rba_params['compartments']
        dummy_proteins = self.cid_mappings['dummy_proteins']

        df = df_c_data.loc[dummy_proteins.values()]
        df.rename(columns=lambda x: re.sub(r'^translation_', '', x), inplace=True)
        df['target_group'] = 'translation_targets'
        df['target_type'] = 'concentrations'
        df['target'] = [f'dummy_protein_{c_name}' for c_name in df.index]
        if 'target_value_type' not in df.columns:
            df['target_value_type'] = 'value'

        cols = [col for col in df.columns if re.match('target', col)]
        df = df[cols]
        df.set_index('target_group', inplace=True)
        return df

    def _add_dummy_proteins(self):
        """Add dummy proteins to different compartments based on configuration.

        Use average protein composition in terms of amino acids.
        """
        dummy_proteins = self.cid_mappings['dummy_proteins']

        # determine average protein composition
        aa_count = defaultdict(int)
        for p in self.model.proteins.values():
            for cmp_id, count in get_seq_composition(p.aa_sequence).items():
                aa_count[cmp_id] += count
        total_count = sum(aa_count.values())
        avg_aa_len = total_count / len(self.model.proteins)
        composition = {aa: avg_aa_len * count / total_count for aa, count in aa_count.items()}

        # Amino acids with low frequency (here, selenocysteine), set stoic coefficient to zero
        for aa in composition:
            if composition[aa] < 0.01:
                composition[aa] = 0.0

        for p_name, cid in dummy_proteins.items():
            self.proteins.add_macromolecule(p_name, cid, composition)

        # update average protein length parameter (assuming this parameter is used for dummy targets)
        f_name = 'inverse_average_protein_length'
        self.parameters.functions[f_name] = RbaFunction(f_name, f_type='constant',
                                                        f_params={'CONSTANT': 1.0 / avg_aa_len})

        print(f'{len(dummy_proteins):4d} dummy proteins added')

    @staticmethod
    def _get_cid_mappings(rba_params):
        """Extract specific compartment functions.

        Depending on 'type' column in sheet 'compartments'
        - 'cytoplasm': default compartment for unassigned gene products, e.g. rRNA proteins
        - 'uptake': compartment with transporters for medium uptake, e.g. 'outer_membrane'
        - 'medium': compartment in which species for uptake are located, e.g. 'external'

        Mapping of XbaModel compartment ids to RBA compartment names
        Each reaction is connected to a compartment (based on its substrates)
        Transport reactions are connected to a compartment composed of a sorted concatenation
        of substrate cids, e.g. 'c-p'.

        returns model specific compartment id mappings required for RBA model. A dict with keywords
        - rcid2cid: mapping of reaction CIDs to CIDs, dict
        - uptake_rcids: reaction CIDs for transport reactions involved in medium uptake, set of str
        - medium_cid: CID where medium is located, str
        - dummy_proteins: name of dummy proteins and their CID location, dict

        e.g.  {'rcid2cid': {'c': 'c', 'e': 'e', 'p': 'p', 'c-p': 'im', 'c-e-p': 'im', 'c-e': 'im', 'e-p': 'om'},
               'uptake_rcids': {'e-p'},
               'cytoplasm_cid': 'c',
               'medium_cid': 'e',
               'dummy_proteins': {'dummy_protein_c': 'c', 'dummy_protein_e': 'e', 'dummy_protein_p': 'p',
               'dummy_protein_im': 'im', 'dummy_protein_om': 'om'}}

        :param dict(str, pandas.DataFrame) rba_params: RBA specific parameters loaded from parameter Excel spreadsheet
        :return: model specific compartment id mappings
        :rtype: dict
        """
        df_c_data = rba_params['compartments']

        # map compartment ids (cids) to reaction cids - mainly required for membrane compartments
        cid_mappings = {}
        cid2rcids = {}
        for cid, row in df_c_data.iterrows():
            if type(row['reaction_cids']) is str:
                cid2rcids[cid] = {rcid.strip() for rcid in row['reaction_cids'].split(',')}

        cid_mappings['rcid2cid'] = {}
        for cid, rcids in cid2rcids.items():
            for rcid in rcids:
                cid_mappings['rcid2cid'][rcid] = cid

        cid_mappings['uptake_rcids'] = set()
        for cid, row in df_c_data.iterrows():
            if row['keyword'] == 'uptake':
                cid_mappings['uptake_rcids'] |= cid2rcids[cid]
            elif row['keyword'] == 'medium':
                cid_mappings['medium_cid'] = cid
            elif row['keyword'] == 'cytoplasm':
                cid_mappings['cytoplasm_cid'] = cid
            elif row['keyword'] == 'total_density':
                cid_mappings['total_density_cid'] = cid

        # identify compartments with translation targets for dummy proteins
        cid_mappings['dummy_proteins'] = {}
        for col in df_c_data.columns:
            if re.match('translation_target', col):
                cids = df_c_data[df_c_data[col].notna()].index
                for cid in cids:
                    p_name = f'dummy_protein_{cid}'
                    if p_name not in cid_mappings['dummy_proteins']:
                        cid_mappings['dummy_proteins'][p_name] = cid
        return cid_mappings

    def _prepare_xba_model(self, rba_params):
        """Prepare XbaModel instance for conversion to RBA.

        extract data from:
            rba_params['machineries']
            rba_params['compartments']

        Add membrane compartment to the XbaModel based on 'compartments' sheet.
        Add gene products required for RBA machineries based on 'machineries' sheet.
        Create relevant proteins in the XbaModel instance and map cofactors.
        Split reactions into (reversible) iso-reactions, each catalyzed by a single enzyme.

        :param dict(str, pandas.DataFrame) rba_params: RBA specific parameters loaded from parameter spreadsheet
        """
        # add (membrane) compartments to the XbaModel
        cs_config = {}
        for cid, row in rba_params['compartments'].iterrows():
            if cid not in self.model.compartments:
                cs_config[cid] = {'name': row['name']}
        self.model.add_compartments(cs_config)
        if len(cs_config) > 0:
            print(f'{len(cs_config):4d} compartment(s) added')

        # add gene products for coenzymes
        if 'coenzymes' in rba_params:
            count = self.model.add_gps(rba_params['coenzymes'].set_index('gpid', drop=False))
            print(f'{count:4d} gene product(s) added for coenzymes')
            self.model.update_gp_mappings()

        # add gene products for process machineries
        df_mach_data = rba_params['machineries']
        count = self.model.add_gps(df_mach_data[df_mach_data['set'] == 'protein'].set_index('gpid'))
        if count > 0:
            print(f'{count:4d} gene product(s) added for process machineries')
            self.model.update_gp_mappings()

        # create model proteins for newly added gene products
        count = self.model.create_proteins()
        if count > 0:
            print(f'{count:4d} protein(s) created with UniProt information')

        if self.model.cofactor_flag is True:
            count = self.model.map_protein_cofactors()
            if count > 0:
                print(f'{count:4d} cofactor(s) mapped to species ids for added protein')

        # split reactions into (reversible) iso-reactions
        n_r = len(self.model.reactions)
        self.model.add_isoenzyme_reactions()
        n_isor = len(self.model.reactions)
        print(f'{n_r} reactions -> {n_isor} iso-reactions, including pseudo reactions')


[docs]
    def configure(self, fname):
        """Configure the RbaModel instance with information provided in the RBA configuration file.

        The RBA configuration spreadsheet must contain following sheets:
        'general', 'trna2locus', 'compartments', 'targets', 'functions', 'processing_maps',
        'processes' and 'machineries'. The sheet 'coenzymes' is optional.

        Example: Create a RBA model by extending an existing genome scale metabolic model.
        The spreadsheet files `xba_parameters.xlsx` and `rba_parameters.xlsx`
        contain configuration data. See tutorials.

        .. code-block:: python

            xba_model = XbaModel('iML1515.xml')
            xba_model.configure('xba_parameters.xlsx')

            rba_model = RbaModel(xba_model)
            rba_model.configure('rba_parameters.xlsx')

        :param str fname: filename of RBA configuration file (.xlsx)
        :return: success status
        :rtype: bool
        """
        sheet_names = ['general', 'trna2locus', 'coenzymes', 'compartments', 'targets',
                       'functions', 'processing_maps', 'processes', 'machineries']
        required_sheets = {'general', 'trna2locus', 'compartments', 'targets',
                           'functions', 'processing_maps', 'processes', 'machineries'}
        rba_params = load_parameter_file(fname, sheet_names)
        missing_sheets = required_sheets.difference(set(rba_params.keys()))
        if len(missing_sheets) > 0:
            print(f'missing required tables {missing_sheets}')
            raise ValueError

        if not self._check_rba_params_functions(rba_params):
            print('ERRORs in RBA Parameter file')
            return False
        if not self._check_rba_params_labels(rba_params):
            print('ERRORs in RBA Parameter file')
            return False

        general_params = rba_params['general']['value'].to_dict()
        self.avg_enz_sat = general_params.get('avg_enz_sat', DEFAULT_ENZ_SATURATION)

        self._prepare_xba_model(rba_params)

        self.cid_mappings = self._get_cid_mappings(rba_params)
        # check compartment mapping
        used_rcids = set()
        for p in self.model.proteins.values():
            used_rcids.add(p.compartment)
        ok_flag = True
        for rcid in used_rcids:
            if rcid not in self.cid_mappings['rcid2cid']:
                print(f'no mapping of "{rcid}" to RBA compartment. Assign mapping in RBA compartments table!')
                ok_flag = False
        if not ok_flag:
            print('ERRORs in RBA Parameter file')
            return False

        self.parameters.from_xba(rba_params)
        self.densities.from_xba(rba_params, self.parameters)
        self.targets.from_xba(rba_params, self.model, self.parameters)
        df_dummy_targets = self._get_dummy_translation_targets(rba_params)
        self.targets.from_xba({'targets': df_dummy_targets}, self.model, self.parameters)
        self.metabolism.from_xba(rba_params, self.model)

        self.medium.from_xba(general_params, self.model)

        macromolecules = set(rba_params['processes']['set'].values)
        if 'dna' in macromolecules:
            self.dna.from_xba(rba_params, self.model, self.cid_mappings)
        if 'rna' in macromolecules:
            self.rnas.from_xba(rba_params, self.model, self.cid_mappings)
        self.proteins.from_xba(rba_params, self.model, self.cid_mappings)
        self._add_dummy_proteins()
        self.enzymes.from_xba(self.avg_enz_sat, self.model, self.parameters, self.cid_mappings, self.medium,
                              DEFAULT_MICHAELIS_CONSTANT)
        self.processes.from_xba(rba_params, self)

        print(f'{len(self.parameters.functions):4d} functions, {len(self.parameters.aggregates):4d} aggregates')
        print(f'>>> RBA model created')

        self._update_xba_model()
        return True


    def _update_xba_model(self):
        """Based on configured RBA model update the corresponding XbaModel.

        RbaModel can be exported in RBA proprietary format.
        XbaModel can be exported in standardized SBML format.

        values of growth_rate and configured medium are used to calculate
        function values, like enzyme efficiencies, enzyme/process concentration
        requirements, targets

        Note: some XbaModel updates already performed in _prepare_xba_model()
        """
        print('update XbaModel with RBA parameters')

        growth_rate = DEFAULT_GROWTH_RATE
        protect_ids = set()

        # create units required for RBA
        rba_units = {'hour': {'name': 'hour', 'units': 'kind=second, exp=1.0, scale=0, mult=3600.0'},
                     'per_h': {'name': 'per hour', 'units': 'kind=second, exp=-1.0, scale=0, mult=3600.0'},
                     'mmol_per_gDW': {'name': 'Millimoles', 'units': 'kind=mole, exp=1.0, scale=-3, mult=1.0; '
                                                                     'kind=gram, exp=-1.0, scale=0, mult=1.0'}
                     }
        for unit_id, data in rba_units.items():
            if unit_id not in self.model.unit_defs:
                data['id'] = unit_id
                self.model.add_unit_def(data)
            protect_ids.add(unit_id)

        pid = 'growth_rate'
        p_data = {'value': growth_rate, 'units': 'per_h', 'name': 'growth rate for initial parametrization'}
        self.model.add_parameter(pid, p_data)
        protect_ids.add(pid)

        # identify compartment where RBA medium is located
        xml_annot = f'ns_uri={XML_COMPARTMENT_NS}, prefix=rba, token=compartment, medium=True'
        add_annot = [['compartment', 'xml_annotation', xml_annot]]
        cols = ['component', 'attribute', 'value']
        df_modify_attrs = pd.DataFrame(add_annot, columns=cols, index=[self.cid_mappings['medium_cid']])
        self.model.modify_attributes(df_modify_attrs, 'compartment')

        # mapping of macromolecule ids to RbaMacromolecule instances
        # TODO: do we actually need self.mmid2mm ?
        mm_types = {'protein': self.proteins, 'dna': self.dna, 'rna': self.rnas}
        for mm_type, mm_data in mm_types.items():
            for mm_id, mm in mm_data.macromolecules.items():
                self.mmid2mm[mm_id] = mm

        # remove biomass reactions for the model
        rids = [rid for rid, r in self.model.reactions.items() if r.kind == 'biomass']
        self.model.del_components('reactions', rids)

        self._xba_add_macromolecule_species()
        self._xba_add_rba_constraints()
        self._xba_couple_reactions()
        self._xba_add_macromolecule_processing_reactions()
        self.parameter_values = self._xba_get_parameter_values(growth_rate)
        self._xba_add_enzyme_concentration_variables(growth_rate)
        self._xba_add_pm_concentration_variables(growth_rate)
        self._xba_add_macromolecule_target_conc_variables(growth_rate)
        self._xba_add_metabolite_target_conc_variables(growth_rate)
        self._xba_add_target_density_variables()
        self._xba_add_flux_targets()
        self._xba_set_dummy_fba_objective()
        # self._xba_unblock_exchange_reactions()
        self.initial_assignments.xba_implementation(self.model)
        self.model.clean(protect_ids)

        # add additional units required for additional parameters
        unit_id = 'mmol_per_l'
        u_dict = {'id': unit_id, 'name': 'millimole per liter',
                  'units': 'kind=mole, exp=1.0, scale=-3, mult=1.0; kind=litre, exp=-1.0, scale=0, mult=1.0'}
        if unit_id not in self.model.parameters:
            self.model.add_unit_def(u_dict)

        # add some parameter values for reference (after model.clean() so they are not removed)
        add_params = {'avg_enz_sat': {'value': self.avg_enz_sat, 'name': 'average enzyme saturation level'},
                      'default_importer_km_value': {'value': DEFAULT_MICHAELIS_CONSTANT, 'name': 'importer Km value',
                                                    'units': 'mmol_per_l'}}
        for pid, p_data in add_params.items():
            self.model.add_parameter(pid, p_data)

        # modify some model attributes and create L3V2 SBML model -
        #  set fbcStrict to False, which is required to supports immediate assignments
        self.model.model_attrs['fbcStrict'] = False
        self.model.model_attrs['id'] += f'_RBA'
        if 'name' in self.model.model_attrs:
            self.model.model_attrs['name'] = f'RBA model of ' + self.model.model_attrs['name']
        self.model.sbml_container['level'] = 3
        self.model.sbml_container['version'] = 2

        print('XbaModel updated with RBA configuration')

    @staticmethod
    def _check_rba_params_functions(rba_params):
        """Check functions used in RBA parameters file.

        Functions used in aggregates of 'compartments', 'targets'
        of 'processes' sheets must also be defined in 'functions'

        :param dict(str, pandas.DataFrame) rba_params:
        :return: success status
        :rtype: bool
        """
        ok_flag = True

        # check defined functions
        defined_functions = set()
        if 'functions' in rba_params:
            defined_functions = set(rba_params['functions'].index)

        # collect functions used in 'compartments', 'targets' sheet
        used_functions = defaultdict(set)
        if 'compartments' in rba_params:
            for cid, row in rba_params['compartments'].iterrows():
                agg = row.get('translation_target_aggregate')
                if type(agg) is str:
                    for fid in [item.strip() for item in agg.split(',')]:
                        used_functions[fid].add(f'compartments: {cid}')
                agg = row.get('density_constraint_aggregate')
                if type(agg) is str:
                    for fid in [item.strip() for item in agg.split(',')]:
                        used_functions[fid].add(f'compartments: {cid}')

        if 'targets' in rba_params:
            for tgid, row in rba_params['targets'].iterrows():
                agg = row.get('target_aggregate')
                if type(agg) is str:
                    for fid in [item.strip() for item in agg.split(',')]:
                        used_functions[fid].add(f'targets: {tgid}')

        if 'processes' in rba_params:
            for pid, row in rba_params['processes'].iterrows():
                agg = row.get('capacity_aggregate')
                if type(agg) is str:
                    for fid in [item.strip() for item in agg.split(',')]:
                        used_functions[fid].add(f'processes: {pid}')

        # check that functions have been defined
        for fid, xids in used_functions.items():
            if fid not in defined_functions:
                print(f'{fid:35} function not defined, but used in {xids}')
                ok_flag = False

        default_fids = {'zero', 'default_spontaneous'}
        for fid in defined_functions:
            if fid not in used_functions and fid not in default_fids:
                print(f'{fid:35s} function defined, but not used')
        return ok_flag

    def _check_rba_params_labels(self, rba_params):
        """Check gene labels used in RBA Parameters file.

        Labels used 'trna2locus' and 'machineries' must exist
        in model UniProt data or NCBI data (for rnas)

        :param dict(str, pandas.DataFrame) rba_params:
        :return: success status
        :rtype: bool
        """
        ok_flag = True
        if 'trna2locus' in rba_params:
            for rna_id, row in rba_params['trna2locus'].iterrows():
                if row['label'] not in self.model.ncbi_data.locus2record:
                    print(f"{row['label']:35s} not found in ncbi data")
                    ok_flag = False

        if 'machineries' in rba_params:
            for pid, row in rba_params['machineries'].iterrows():
                if row.get('set', '') == 'protein':
                    if (row['label'] not in self.model.uniprot_data.locus2uid and
                            row['label'] not in self.model.ncbi_data.locus2protein):
                        print(f"{row['label']:35s} not found in UniProt nor NCBI data")
                        ok_flag = False
                elif row.get('set', '') == 'rna':
                    if row['label'] not in self.model.ncbi_data.locus2record:
                        print(f"{row['label']:35s} not found in ncbi data")
                        ok_flag = False
        return ok_flag

    def _xba_add_macromolecule_species(self):
        """Add macromolecules to XbaModel species (proteins, dna, rna).

        Constraint ID: 'MM_<mmid>

        Macromolecules (except for small macromolecules, i.e. single dna, mrna single components)
        are scaled at a fixed factor of 1000, i.e. µmol instead of mmol (improves LP problem) for large
        macromolecules, i.e. > 10 amino acids weight.
        The scale factor is set on the macromolecule instance.

        MIRIAM annotation and Uniport id is added
        XML annotation with equivalent amino acid weight and scale factor is added

        Note: production / degradation reaction and target concentration variables are scaled accordingly.
        """
        xml_prefix = f'ns_uri={XML_SPECIES_NS}, prefix=rba, token=macromolecule'

        mm_species = {}
        # create protein species and configure scale
        for mm_id, mm in self.proteins.macromolecules.items():
            mm.sid = pf.MM_ + valid_sbml_sid(mm_id)
            mm.scale = 1000.0 if mm.weight > 10 else 1.0
            # for indiviually modeled proteins (UniProt data is collected)
            if mm_id in self.model.locus2uid:
                uid = self.model.locus2uid[mm_id]
                name = f'macromolecule {mm_id} ({uid})'
                miriam_annot = f'bqbiol:is, uniprot/{uid}'
                mw_kda = self.model.proteins[uid].mw / 1000.0
            # for dummy proteins, we calculate molecular weight based on aa sequence
            else:
                mw_kda = protein_mw_from_aa_comp(mm.composition) / 1000.0
                miriam_annot = None
                name = f'macromolecule {mm_id}'
            xml_annot = (f'{xml_prefix}, type={self.proteins.type}, '
                         f'weight_aa={mm.weight}, weight_kDa={mw_kda:.4f}, scale={mm.scale}')
            mm_species[mm.sid] = [name, mm.compartment, False, False, False,
                                  f'meta_{mm.sid}', miriam_annot, xml_annot]

        for mm_id, mm in self.rnas.macromolecules.items():
            mm.sid = pf.MM_ + valid_sbml_sid(mm_id)
            mm.scale = 1000.0 if mm.weight > 10 else 1.0
            mw_kda = rna_mw_from_nt_comp(mm.composition) / 1000.0
            name = f'macromolecule {mm_id}'
            xml_annot = (f'{xml_prefix}, type={self.rnas.type}, '
                         f'weight_aa={mm.weight}, weight_kDa={mw_kda:.4f}, scale={mm.scale}')
            mm_species[mm.sid] = [name, mm.compartment, False, False, False, f'meta_{mm.sid}', None, xml_annot]

        for mm_id, mm in self.dna.macromolecules.items():
            mm.sid = pf.MM_ + valid_sbml_sid(mm_id)
            mm.scale = 1000.0 if mm.weight > 10 else 1.0
            mw_kda = ssdna_mw_from_dnt_comp(mm.composition) / 1000.0
            name = f'macromolecule {mm_id}'
            xml_annot = (f'{xml_prefix}, type={self.dna.type}, '
                         f'weight_aa={mm.weight}, weight_kDa={mw_kda:.4f}, scale={mm.scale}')
            mm_species[mm.sid] = [name, mm.compartment, False, False, False, f'meta_{mm.sid}', None, xml_annot]

        cols = ['name', 'compartment', 'hasOnlySubstanceUnits', 'boundaryCondition', 'constant',
                'meta_id', 'miriamAnnotation', 'xmlAnnotation']
        df_add_species = pd.DataFrame(mm_species.values(), index=list(mm_species), columns=cols)
        print(f"{len(df_add_species):4d} macromoledules to add")
        self.model.add_species(df_add_species)

    def _xba_add_rba_constraints(self):
        """Add RBA constraints to XbaModel instance.

        constrains are added as pseudo species (similar to mass balance constraints of metabolites)
        'C_PMC_<prod_id>: process machine capacities
        'C_EF_<eid>: forward enzyme capacities
        'C_ER_<eid>: reverse enzyme capacities
        'C_D_<cid>: compartment density constraints
        """
        constraints = {}
        for proc_id, proc in self.processes.processes.items():
            if 'capacity' in proc.machinery:
                constr_id = pf.C_PMC_ + valid_sbml_sid(proc_id)
                constraints[constr_id] = [f'capacity {proc_id}', self.cid_mappings['cytoplasm_cid'],
                                          False, False, False]
                proc.constr_id = constr_id

        for eid in sorted(self.enzymes.enzymes):
            e = self.enzymes.enzymes[eid]
            if len(e.mach_reactants) > 0 or len(e.mach_products) > 0:
                eidx = re.sub(r'_enzyme$', '', eid)
                if e.forward_eff != self.parameters.f_name_zero:
                    constr_id = pf.C_EF_ + valid_sbml_sid(eidx)
                    constraints[constr_id] = [f'fwd efficiency {eid}', self.cid_mappings['cytoplasm_cid'],
                                              False, False, False]
                    e.constr_id_fwd = constr_id
                if e.backward_eff != self.parameters.f_name_zero:
                    constr_id = pf.C_ER_ + valid_sbml_sid(eidx)
                    constraints[constr_id] = [f'rev efficiency {eid}', self.cid_mappings['cytoplasm_cid'],
                                              False, False, False]
                    e.constr_id_rev = constr_id

        for cid, d in self.densities.densities.items():
            constr_id = pf.C_D_ + valid_sbml_sid(cid)
            constraints[constr_id] = [f'compartment density for {cid}', cid, False, False, False]
            d.constr_id = constr_id

        cols = ['name', 'compartment', 'hasOnlySubstanceUnits', 'boundaryCondition', 'constant']
        df_add_species = pd.DataFrame(constraints.values(), index=list(constraints), columns=cols)
        print(f"{len(df_add_species):4d} RBA constraints to add")
        self.model.add_species(df_add_species)

    def _xba_couple_reactions(self):
        """Couple reactions to enzyme efficiencies.

        i.e. respective forward enzyme efficiency added as product to enzyme catalyzed reaction
             respective reverse enzyme efficiency added as substrate to reversible enzyme catalyzed reaction

        Note: for reverse reaction enzyme coupling RBA and TRBA reactions have to be
          coupled differently to the enzyme.
            - RBA: C_ER_<ridx> coupled with -1 to reaction rid
            - TFBA: C_ER_<ridx> coupled with +1 to reaction rid_REV

        Enzyme coupling constraint:
            C_EF_<ridx>_enzyme: R_<ridx> - kcat * V_EC_<ridx> ≤ 0
            C_ER_<ridx>_enzyme: -1 R_<ridx> - kcat * V_EC_<ridx> ≤ 0
        """
        modify_attrs = []
        for eid in sorted(list(self.enzymes.enzymes)):
            e = self.enzymes.enzymes[eid]
            if len(e.mach_reactants) > 0 or len(e.mach_products) > 0:
                rid = e.reaction
                if e.forward_eff != self.parameters.f_name_zero:
                    modify_attrs.append([e.reaction, 'reaction', 'product', f'{e.constr_id_fwd}=1.0'])
                if e.backward_eff != self.parameters.f_name_zero:
                    if e.rev_reaction == rid:
                        modify_attrs.append([rid, 'reaction', 'reactant', f'{e.constr_id_rev}=1.0'])
                    else:
                        modify_attrs.append([e.rev_reaction, 'reaction', 'product', f'{e.constr_id_rev}=1.0'])

        cols = ['id', 'component', 'attribute', 'value']
        df_modify_attrs = pd.DataFrame(modify_attrs, columns=cols)
        df_modify_attrs.set_index('id', inplace=True)
        print(f'{len(df_modify_attrs):4d} fwd/rev reactions to couple with enzyme efficiency constraints')
        self.model.modify_attributes(df_modify_attrs, 'reaction')

    def _xba_add_macromolecule_processing_reactions(self):
        """Create production and degradation reactions for macromolecules.

        Here, macromolecule production/degradation reactions are implemented in detail, whereas these
        are lumped in the original RBA formulation, making it difficult to follow the implementation in RBApy.

        Though this will add more variables to the linear problem, the advantages of separating the
        individual cost terms (to better understand the underlying mechanism) could improve
        understanding of the model construction.

        for specified macromolecules we add up production requirements and create a 'R_PROC_<mm_id> reaction
        for specified macromolecules we add up degradation requirements and create a 'R_DEGR_<mm_id> reaction

        processing costs are identified and added to processing constraints 'C_PMC_<pm_id>', actually
        adding respective products with calculated costs as stoic coefficients.

        reaction fluxes are in units of µmol/gDWh (note: metabolic reaction fluxes are in mmol/gDWh)
        """
        # get mapping of macromolecule to related processing reactions for production and degradation:
        mm_productions = defaultdict(list)
        mm_degradations = defaultdict(list)
        for proc_id, proc in self.processes.processes.items():
            for mm_id in proc.productions.get('inputs', []):
                mm_productions[mm_id].append(proc_id)
            for mm_id in proc.degradations.get('inputs', []):
                mm_degradations[mm_id].append(proc_id)

        # determine processing cost for productions / degradations and create processing reactions
        lb_pid_mmol = self.model.fbc_shared_pids[self.model.flux_uid][0.0]
        ub_pid_mmol = self.model.get_fbc_bnd_pid(MAX_MM_PROCESSING_FLUX, self.model.flux_uid,
                                                 'max_processing_flux_mmol')
        lb_pid_umol = self.model.get_fbc_bnd_pid(0.0, 'umol_per_gDWh', 'zero_processing_flux_umol')
        ub_pid_umol = self.model.get_fbc_bnd_pid(MAX_MM_PROCESSING_FLUX, 'umol_per_gDWh', 'max_processing_flux_umol')

        prod_reactions = {}
        degr_reactions = {}
        for mm_id in sorted(self.mmid2mm):
            mm = self.mmid2mm[mm_id]
            # separately check production processes and degradation processes
            for proc_type, mm2maps in {'PROD': mm_productions, 'DEGR': mm_degradations}.items():
                reactants = defaultdict(float)
                products = defaultdict(float)
                if mm2maps.get(mm_id):
                    # if there is any production/degradation process create combined production/degradation fluxes
                    for pm_id in mm2maps.get(mm_id):
                        pm = self.processes.processes[pm_id]
                        pm_map_id = pm.productions['processingMap'] if proc_type == 'PROD' \
                            else pm.degradations['processingMap']
                        pm_map = self.processes.processing_maps[pm_map_id]

                        # constant processing requirements
                        for sid, s_stoic in pm_map.constant_processing.get('reactants', {}).items():
                            reactants[sid] += float(s_stoic)/mm.scale
                        for sid, s_stoic in pm_map.constant_processing.get('products', {}).items():
                            products[sid] += float(s_stoic)/mm.scale

                        # component processing requirements
                        for comp_id, comp_stoic in mm.composition.items():
                            if comp_id in pm_map.component_processings:
                                comp_proc = pm_map.component_processings[comp_id]
                                for sid, s_stoic in comp_proc.get('reactants', {}).items():
                                    reactants[sid] += comp_stoic * float(s_stoic)/mm.scale
                                for sid, s_stoic in comp_proc.get('products', {}).items():
                                    products[sid] += comp_stoic * float(s_stoic)/mm.scale

                        # processing cost, wrt processing machine capacity, if any
                        if pm.constr_id:
                            pm_costs = 0.0
                            for comp_id, comp_stoic in mm.composition.items():
                                if comp_id in pm_map.component_processings:
                                    comp_proc = pm_map.component_processings[comp_id]
                                    pm_costs += comp_stoic * comp_proc.get('cost', 0.0)
                            products[pm.constr_id] = round(pm_costs / mm.scale, 8)

                    if proc_type == 'PROD':
                        rid = pf.R_PROD_ + valid_sbml_sid(mm_id)
                        products[mm.sid] += 1.0
                        if mm.scale == 1.0:
                            prod_reactions[rid] = [f'production reaction for {mm_id} (mmol)', False,
                                                   dict(reactants), dict(products), mm.scale,
                                                   lb_pid_mmol, ub_pid_mmol,
                                                   'RBA_pm_reaction', 'RBA macromolecule processing']
                        else:
                            prod_reactions[rid] = [f'production reaction for {mm_id} (µmol)', False,
                                                   dict(reactants), dict(products), mm.scale,
                                                   lb_pid_umol, ub_pid_umol,
                                                   'RBA_pm_reaction', 'RBA macromolecule processing']
                    else:
                        rid = pf.R_DEGR_ + valid_sbml_sid(mm_id)
                        reactants[mm.sid] += 1.0
                        if mm.scale == 1.0:
                            degr_reactions[rid] = [f'degradation reaction for {mm_id} (mmol)', False,
                                                   dict(reactants), dict(products), mm.scale,
                                                   lb_pid_mmol, ub_pid_mmol,
                                                   'RBA_pm_reaction', 'RBA macromolecule processing']
                        else:
                            degr_reactions[rid] = [f'degradation reaction for {mm_id} (µmol)', False,
                                                   dict(reactants), dict(products), mm.scale,
                                                   lb_pid_umol, ub_pid_umol,
                                                   'RBA_pm_reaction', 'RBA macromolecule processing']

        pm_reactions = prod_reactions | degr_reactions
        cols = ['name', 'reversible', 'reactants', 'products', 'scale',
                'fbcLowerFluxBound', 'fbcUpperFluxBound', 'kind', 'notes']
        df_add_rids = pd.DataFrame(pm_reactions.values(), index=list(pm_reactions), columns=cols)
        print(f'{len(df_add_rids):4d} processing reactions to add')
        self.model.add_reactions(df_add_rids)

    def _xba_get_parameter_values(self, growth_rate):
        """Calculate parameter values based on given growth rate.

        :param float growth_rate: growth rate in h-1 for which to determine values
        :return: parameter values determined based on params
        :rtype: dict(str, float)
        """
        medium_cid = self.cid_mappings['medium_cid']
        mid2sid = {}
        for mid in self.medium.concentrations:
            if f'{mid}_{medium_cid}' in self.model.species:
                mid2sid[mid] = f'{mid}_{medium_cid}'

        params = {mid2sid[mid]: val for mid, val in self.medium.concentrations.items() if val != 0.0}
        params['growth_rate'] = growth_rate
        values = self.parameters.get_values(params)
        print(f"{len(values):4d} parameter values calulated based on {growth_rate:.2f} h-1 growth rate and medium")
        return values

    def _couple_weights(self, srefs):
        """Calculate enzymes/process machines weight and couple to density constraints.

        :param dict(str, float) srefs: species reference with reactants and stoic coefficient of composition
        :return: density constraints affected by weight
        :rtype: dict(str, float)
        """
        weights = defaultdict(float)
        for comp_id, stoic in srefs.items():
            if comp_id in self.mmid2mm:
                mm = self.mmid2mm[comp_id]
                weight = stoic * mm.weight/mm.scale
                weights[mm.compartment] += weight
                if 'total_density_cid' in self.cid_mappings:
                    weights[self.cid_mappings['total_density_cid']] += weight

        weight_coupling = defaultdict(float)
        for cid, weight in weights.items():
            if cid in self.densities.densities:
                weight_coupling[self.densities.densities[cid].constr_id] = weight

        return dict(weight_coupling)

    def _get_dilution_srefs(self, var_id, growth_rate, composition_srefs):
        """Determine reactant or product species references for concentration variables.

        These variables are in units of µmol/gDW. Coefficients need to be scaled

        RBA macromolecule ids are converted to respective XbaModel species id
        Reactants/products are based on stoichiometry of machinery composition
        Reactants/products are diluted as per growth rate
        Reactants/products are scaled to match units of concentration variable (µmol_per_gDW)
        As stoichiometry is based on growth rate, data for initial assignment is collected
        as well as species reference ids

        SBML Validation:
          When the value of the attribute variable in an InitialAssignment object refers to a SpeciesReference object,
          the unit of measurement associated with the InitialAssignment's math expression should be consistent with
          the unit of stoichiometry, that is, dimensionless.

        :param str var_id: id of variable, e.g. 'V_EC_<eid>', required for sref id construction
        :param float growth_rate: growth rate in h-1, dilute molecule accordingly
        :param dict(str, float) composition_srefs: reactants or product species refs
        :return: srefs with stoichiometry
        :rtype: dict(str, float)
        """
        dilution_srefs = {}
        for m_sid, m_stoic in composition_srefs.items():
            if m_sid in self.mmid2mm:
                sid = self.mmid2mm[m_sid].sid
                scale = self.mmid2mm[m_sid].scale
            else:
                sid = m_sid
                scale = 1.0
            if scale == 1000.0:
                dilution_srefs[sid] = growth_rate * m_stoic
                self.initial_assignments.add_sref_ia(var_id, sid, math_var=f'growth_rate * {m_stoic} hour')
            else:
                dilution_srefs[sid] = growth_rate * m_stoic / 1000.0
                self.initial_assignments.add_sref_ia(var_id, sid,
                                                     math_var=f'growth_rate * {m_stoic/ 1000.0} hour')
        return dilution_srefs

    def _xba_add_enzyme_concentration_variables(self, growth_rate):
        """Add enzyme concentration variables to the XbaModel instance.

        Enzyme concentration variables: V_EC_<ridx>
        Couple variables to macromolecule mass balances.
        Couple these variables to enzyme efficiency and process machinery
        capacity constraints.
        Couple the variables to compartment density constraints.

        Enzyme concentrations are in µmol/gDW

        :param float growth_rate: growth rate (h-1) required for macromolecule dilution
        """
        xml_prefix = f'ns_uri={XML_SPECIES_NS}, prefix=rba, token=macromolecule'

        lb_pid = self.model.get_fbc_bnd_pid(0.0, 'umol_per_gDW', 'zero_enzyme_umol_conc')
        ub_pid = self.model.get_fbc_bnd_pid(MAX_ENZ_CONC, 'umol_per_gDW', 'max_enzyme_umol_conc')
        scale = 1000.0

        conc_vars = {}
        for eid, e in self.enzymes.enzymes.items():
            if len(e.mach_reactants) > 0 or len(e.mach_products) > 0:
                eidx = re.sub(r'_enzyme$', '', valid_sbml_sid(eid))
                var_id = pf.V_EC_ + eidx
                name = f'{eid} concentration (µmol)'

                # Determine reactants/products and sref ids for enzyme concentration variable
                reactants = self._get_dilution_srefs(var_id, growth_rate, e.mach_reactants)
                products = self._get_dilution_srefs(var_id, growth_rate, e.mach_products)

                # get gene product association from proteins used in machinery
                gps = [self.model.locus2gp[locus] for locus in e.mach_reactants if locus in self.model.locus2gp]
                gpa = 'assoc=(' + ' and '.join(gps) + ')' if len(gps) > 0 else None

                # enzyme efficiency constraints coupling
                if e.constr_id_fwd:
                    reactants[e.constr_id_fwd] = self.parameter_values[e.forward_eff] / scale
                    self.initial_assignments.add_sref_ia(var_id, e.constr_id_fwd, rba_pid=e.forward_eff,
                                                         math_const=f'{1.0/scale} dimensionless')
                if e.constr_id_rev:
                    reactants[e.constr_id_rev] = self.parameter_values[e.backward_eff] / scale
                    self.initial_assignments.add_sref_ia(var_id, e.constr_id_rev, rba_pid=e.backward_eff,
                                                         math_const=f'{1.0/scale} dimensionless')

                # couple weights of enzyme composition to density constraints
                products.update(self._couple_weights(e.mach_reactants))

                # determine enzyme molecular weight
                enz_mw_kda = 0.0
                for comp_id, stoic in e.mach_reactants.items():
                    if comp_id in self.proteins.macromolecules:
                        if comp_id in self.model.locus2uid:
                            uid = self.model.locus2uid[comp_id]
                            mw_kda = self.model.proteins[uid].mw / 1000.0
                            enz_mw_kda += stoic * mw_kda
                xml_annot = f'{xml_prefix}, weight_kDa={enz_mw_kda:.4f}, scale={scale}'

                conc_vars[var_id] = [name, False, reactants, products, scale, gpa, lb_pid, ub_pid,
                                     'RBA_enz_conc', xml_annot]

        cols = ['name', 'reversible', 'reactants', 'products', 'scale', 'fbcGeneProdAssoc',
                'fbcLowerFluxBound', 'fbcUpperFluxBound', 'kind', 'xmlAnnotation']
        df_add_rids = pd.DataFrame(conc_vars.values(), index=list(conc_vars), columns=cols)
        print(f'{len(df_add_rids):4d} enzyme concentration variables to add')
        self.model.add_reactions(df_add_rids)

    def _xba_add_pm_concentration_variables(self, growth_rate):
        """Add process machine concentration variables to the XbaModel instance.

        see _xba_add_enzyme_concentration_variables
        Process machinery concentration variables: V_PMC_<pm_id>
        Couple variables to macromolecule mass balances.
        Couple these variables to enzyme efficiency and process machinery
        capacity constraints.
        Couple the variables to compartment density constraints.

        Process machinery concentrations are in µmol/gDW

        :param float growth_rate: growth rate (h-1) required for macromolecule dilution
        """
        xml_prefix = f'ns_uri={XML_SPECIES_NS}, prefix=rba, token=macromolecule'

        lb_pid = self.model.get_fbc_bnd_pid(0.0, 'umol_per_gDW', 'zero_enzyme_umol_conc')
        ub_pid = self.model.get_fbc_bnd_pid(MAX_ENZ_CONC, 'umol_per_gDW', 'max_enzyme_umol_conc')
        scale = 1000.0

        conc_vars = {}
        for pm_id, pm in self.processes.processes.items():
            if 'capacity' in pm.machinery:
                var_id = pf.V_PMC_ + valid_sbml_sid(pm_id)
                name = f'{pm_id} concentration'

                # Determine reactants/products and sref ids for PM concentration variable
                reactants = self._get_dilution_srefs(var_id, growth_rate, pm.machinery['reactants'])
                products = self._get_dilution_srefs(var_id, growth_rate, pm.machinery['products'])

                # get gene product association
                gps = [self.model.locus2gp[locus] for locus in pm.machinery['reactants']
                       if locus in self.model.locus2gp]
                gpa = 'assoc=(' + ' and '.join(gps) + ')' if len(gps) > 0 else None

                # process machinery capacity coupling
                reactants[pm.constr_id] = self.parameter_values[pm.machinery['capacity'].value] / scale
                self.initial_assignments.add_sref_ia(var_id, pm.constr_id, rba_pid=pm.machinery['capacity'].value,
                                                     math_const=f'{1.0 / scale} dimensionless')

                # couple weights of process machine composition to density constraints
                products.update(self._couple_weights(pm.machinery['reactants']))

                # determine molecular weight of process machine, consisting of proteins and/or RNA
                pm_mw_kda = 0.0
                for comp_id, stoic in pm.machinery['reactants'].items():
                    if comp_id in self.proteins.macromolecules:
                        if comp_id in self.model.locus2uid:
                            uid = self.model.locus2uid[comp_id]
                            mw_kda = self.model.proteins[uid].mw / 1000.0
                            pm_mw_kda += stoic * mw_kda
                    elif comp_id in self.rnas.macromolecules:
                        mm = self.rnas.macromolecules[comp_id]
                        mw_kda = rna_mw_from_nt_comp(mm.composition) / 1000.0
                        pm_mw_kda += stoic * mw_kda
                xml_annot = f'{xml_prefix}, weight_kDa={pm_mw_kda:.4f}, scale={scale}'

                conc_vars[var_id] = [name, False, reactants, products, scale, gpa, lb_pid, ub_pid,
                                     'RBA_pm_conc', xml_annot]

        cols = ['name', 'reversible', 'reactants', 'products', 'scale', 'fbcGeneProdAssoc',
                'fbcLowerFluxBound', 'fbcUpperFluxBound', 'kind', 'xmlAnnotation']
        df_add_rids = pd.DataFrame(conc_vars.values(), index=list(conc_vars), columns=cols)
        print(f'{len(df_add_rids):4d} process machine concentration variables to add')
        self.model.add_reactions(df_add_rids)

    def _xba_add_macromolecule_target_conc_variables(self, growth_rate):
        """Add variables for macromolecule target concentrations to the XbaModel instance.

        V_TMMC_<mid>: For each target concentration of a macromolecule a separate variable is introduced.
        Units: µmol/gDW or mmol/gDW
        This variable is fixed (lower/upper bound) to target concentration in respective unit (this can be
        a function of growth rate). Note: targets concentrations are defined in mmol/gDW and need to be rescaled
        Reactant coefficient for macromolecule is 1 times growth rate (i.e. macromolecular dilution),
        Product coefficient / Weight coupling is macromolecular weight divided by scale (weight in mmol AA/gDW)
        Variable bounds need to be updated during bisection optimization.

        :param float growth_rate: growth rate (h-1) required for macromolecule dilution
        """
        conc_targets = {}
        for tg_id, tg in self.targets.target_groups.items():
            for tid, t in tg.concentrations.items():
                if tid in self.mmid2mm:
                    var_id = pf.V_TMMC_ + valid_sbml_sid(tid)
                    mm = self.mmid2mm[tid]

                    # macromolecule dilution with growth rate
                    reactants = {mm.sid: growth_rate}
                    self.initial_assignments.add_sref_ia(var_id, mm.sid, math_var='growth_rate * 1 hour')

                    # add compartment density constraint
                    weight_coupling = {}
                    cids = [mm.compartment]
                    if 'total_density_cid' in self.cid_mappings:
                        cids.append(self.cid_mappings['total_density_cid'])
                    for cid in cids:
                        if cid in self.densities.densities:
                            weight_coupling[self.densities.densities[cid].constr_id] = mm.weight / mm.scale
                    products = weight_coupling

                    # get gene product association
                    gpid = self.model.locus2gp.get(tid)
                    gpa = f'assoc={gpid}' if gpid else None

                    # target concentration implemented as fixed variable bound
                    units_id = 'mmol_per_gDW' if mm.scale == 1.0 else 'umol_per_gDW'
                    var_bnd_val = self.parameter_values[t.value] * mm.scale
                    var_bnd_pid = self.model.get_fbc_bnd_pid(var_bnd_val, units_id, f'target_conc_{tid}', reuse=False)
                    conc_targets[var_id] = [f'{tid} concentration target', False, reactants, products,
                                            mm.scale, gpa, var_bnd_pid, var_bnd_pid, 'RBA_mm_conc']
                    self.initial_assignments.add_var_bnd_ia(var_bnd_pid, rba_pid=t.value,
                                                            math_const=f'{mm.scale} {units_id}')

        cols = ['name', 'reversible', 'reactants', 'products', 'scale', 'fbcGeneProdAssoc',
                'fbcLowerFluxBound', 'fbcUpperFluxBound', 'kind']
        df_add_rids = pd.DataFrame(conc_targets.values(), index=list(conc_targets), columns=cols)
        print(f'{len(df_add_rids):4d} macromolecule concentration target variables to add')
        self.model.add_reactions(df_add_rids)

    def _xba_add_metabolite_target_conc_variables(self, growth_rate):
        """Add variables for metabolite (small molecules) target concentrations to the XbaModel instance.

        V_TSMC: Target concentrations for small molecules are collected in a single variable
        Unit: µmol/gDW
        The variable is fixed at the growth rate (lower/upper bound)
        Reactant coefficients are the target concentrations (these can be functions of growth rate by itself)
        Growth rate dependent coefficients and variable bounds need to be updated during bisection optimization.
           NOTE: THIS will be changed to target = 1 and dilution terms for molecules

        :param float growth_rate: growth rate (h-1) required for macromolecule dilution
        """
        # target concentration variables are constants, i.e. fixed at value 1
        var_id = pf.V_TSMC
        one_umol_pid = self.model.get_fbc_bnd_pid(1.0e-3, 'mmol_per_gDW', 'one_umol_per_gDW', reuse=False)
        scale = 1000.0

        conc_targets = {}
        reactants_tsmc = {}
        for tg_id, tg in self.targets.target_groups.items():
            for sid, t in tg.concentrations.items():
                sbml_sid = valid_sbml_sid(sid)
                if sid not in self.mmid2mm:
                    assert sid in self.model.species, f'targets: metabolite {sid} is not included in model species'
                    reactants_tsmc[sbml_sid] = growth_rate * self.parameter_values[t.value] * scale
                    self.initial_assignments.add_sref_ia(var_id, sbml_sid, rba_pid=t.value,
                                                         math_var=f'growth_rate * {scale} hour')
        conc_targets[var_id] = [f'small molecules concentration targets (mmol)', False, reactants_tsmc, {},
                                scale, one_umol_pid, one_umol_pid, 'RBA_sm_conc']

        cols = ['name', 'reversible', 'reactants', 'products', 'scale',
                'fbcLowerFluxBound', 'fbcUpperFluxBound', 'kind']
        df_add_rids = pd.DataFrame(conc_targets.values(), index=list(conc_targets), columns=cols)
        print(f'{len(df_add_rids):4d} metabolites concentration target variable to add')
        self.model.add_reactions(df_add_rids)

    def _xba_add_target_density_variables(self):
        """Add variables for target compartment density and slack variable to XbaModel.

        V_TCD: Target density concentrations
        Units: mmol AA/gDW
        The variable is fixed at 1.
        Reactant coefficients are the target densities for the various compartments
        Target concentrations that depend on growth rate need to be updated during bisection optimization

        V_SLACK_<cid>: positive slack on compartment density constraints
        Units: mmol AA/gDW
        Variable is bounded by upper value of MAX_DENSITY_SLACK (here 10 mmol AA/gDW)
        Implemented, so we can report on maximum compartment capacity utilization.
        """
        # add compartment density targets
        var_id = pf.V_TCD
        one_mmol_pid = self.model.get_fbc_bnd_pid(1.0, 'mmol_per_gDW', 'one_mmol_aa_per_gDW', reuse=False)
        density_vars = {}
        reactants = {}
        reactants_vars = {}
        for cid, d in self.densities.densities.items():
            reactants[d.constr_id] = self.parameter_values[d.target_value.upper_bound]
            self.initial_assignments.add_sref_ia(var_id, d.constr_id, rba_pid=d.target_value.upper_bound)

            fid = d.target_value.upper_bound
            if not (fid in self.parameters.functions and self.parameters.functions[fid].type == 'constant'):
                reactants_vars[d.constr_id] = fid
        density_vars[var_id] = [f'max compartment density target (mmol AA)', False, reactants, {},
                                one_mmol_pid, one_mmol_pid, 'RBA_max_density']

        # add slack variables for density constraints
        lb_pid = self.model.get_fbc_bnd_pid(0.0, 'mmol_per_gDW', 'density_slack_min', reuse=False)
        ub_pid = self.model.get_fbc_bnd_pid(MAX_DENSITY_SLACK, 'mmol_per_gDW', 'density_slack_max', reuse=False)
        for cid, d in self.densities.densities.items():
            products = {d.constr_id: 1.0}
            density_vars[pf.V_SLACK_ + cid] = [f'Positive Slack on {cid} density', False, {}, products,
                                               lb_pid, ub_pid, 'slack_variable']

        cols = ['name', 'reversible', 'reactants', 'products',
                'fbcLowerFluxBound', 'fbcUpperFluxBound', 'kind']
        df_add_rids = pd.DataFrame(density_vars.values(), index=list(density_vars), columns=cols)
        print(f'{len(df_add_rids):4d} density target and slack variables to add')
        self.model.add_reactions(df_add_rids)

    def _xba_add_flux_targets(self):
        """Add RBA reaction/production/degradation flux targets to the XbaModel instance.

        Such targets are implemented as reaction/variable bounds (fbc_bounds)

        reactionFlux targets are applied to model reactions
        productionFLux and degradatationFlux are applied on macromolecule production/degradation reactions.
        Note: a productionFlux target is implemented as a lower bound.

        reaction/variable bound parameters are created with 'correct' units.

        We also collect the flux bounds depending on growth rate, to be applied during optimization.
        """
        non_const_fids = defaultdict(dict)

        modify_attrs = []
        for tg_id, tg in self.targets.target_groups.items():

            flux_targets = {'': tg.reaction_fluxes, 'R_PROD_': tg.production_fluxes, 'R_DEGR_': tg.degradation_fluxes}
            for rid_prefix, targets in flux_targets.items():
                for tid, t in targets.items():
                    rid = f'{rid_prefix}{tid}'
                    r = self.model.reactions[rid]
                    scale = r.scale
                    ia_units = 'umol_per_gDWh' if scale == 1000.0 else 'mmol_per_gDW_per_hr'
                    unit_id = self.model.parameters[r.fbc_lower_bound].units

                    if t.value:
                        var_bnd_val = self.parameter_values[t.value] * scale
                        var_bnd_pid = self.model.get_fbc_bnd_pid(var_bnd_val, unit_id, f'{rid}_rba_bnd', reuse=False)
                        modify_attrs.append([rid, 'reaction', 'fbc_lower_bound',    var_bnd_pid])
                        self.initial_assignments.add_var_bnd_ia(var_bnd_pid, rba_pid=t.value,
                                                                math_const=f'{scale} {ia_units}')
                        fid = t.value
                        if self.parameters.functions[fid].type != 'constant':
                            non_const_fids[rid]['lb'] = fid

                        if rid_prefix != 'R_PROD_':
                            modify_attrs.append([rid, 'reaction', 'fbc_upper_bound', var_bnd_pid])
                            if self.parameters.functions[fid].type != 'constant':
                                non_const_fids[rid]['ub'] = fid
                    else:
                        if t.lower_bound:
                            var_bnd_val = self.parameter_values[t.lower_bound] * scale
                            var_bnd_pid = self.model.get_fbc_bnd_pid(var_bnd_val, unit_id, f'{rid}_rba_lb', reuse=False)
                            modify_attrs.append([rid, 'reaction', 'fbc_lower_bound', var_bnd_pid])
                            self.initial_assignments.add_var_bnd_ia(var_bnd_pid, rba_pid=t.lower_bound,
                                                                    math_const=f'{scale} {ia_units}')
                            fid = t.lower_bound
                            if self.parameters.functions[fid].type != 'constant':
                                non_const_fids[rid]['lb'] = fid

                        if t.upper_bound:
                            var_bnd_val = self.parameter_values[t.upper_bound] * scale
                            var_bnd_pid = self.model.get_fbc_bnd_pid(var_bnd_val, unit_id, f'{rid}_rba_ub', reuse=False)
                            modify_attrs.append([rid, 'reaction', 'fbc_upper_bound', var_bnd_pid])
                            self.initial_assignments.add_var_bnd_ia(var_bnd_pid, rba_pid=t.upper_bound,
                                                                    math_const=f'{scale} {ia_units}')
                            fid = t.upper_bound
                            if self.parameters.functions[fid].type != 'constant':
                                non_const_fids[rid]['ub'] = fid

        df_modify_attrs = pd.DataFrame(modify_attrs, columns=['id', 'component', 'attribute', 'value'])
        df_modify_attrs.set_index('id', inplace=True)
        print(f"{len(df_modify_attrs):4d} Flux/variable bounds need to be updated.")
        self.model.modify_attributes(df_modify_attrs, 'reaction')

    def _xba_set_dummy_fba_objective(self):
        """Add optimization objective to the XbaModel instance.

        RBA is an LP feasibility problem where, where coefficients and bounds are reconfigured
        depending on a selected growth rate.
        The optimization algorithm of RBA selects for the highest growth rate, which still
        results in a feasible problem. FBA objective is not relevant, though
        in the contexts of FBA optimization (here, just feasibility checking) an (arbitrary)
        FBA objective is defined, so problem can be solved using LP solvers
        """
        # remove any existing objectives
        obj_ids = [obj_id for obj_id in self.model.objectives]
        self.model.del_components('objectives', obj_ids)

        # Configure a dummy FBA objective:
        objectives_config = {'rba_obj': {'type': 'maximize', 'active': True, 'coefficients': {pf.V_TSMC: 1.0}}}
        self.model.add_objectives(objectives_config)
        print(f'Dummy FBA objective configured: maximize {pf.V_TSMC}')

    def to_excel(self, fname):
        """Export RBA model in RBApy format to Microsoft Excel spreadsheet

        :meta private:
        :param str fname: file name of spreadsheet
        :return: success status
        :rtype: bool
        """
        m_dict = {}
        for component in components:
            m_dict |= getattr(self, component).to_df()

        # add target value information strings
        for idx, row in m_dict['enzymes'].iterrows():
            m_dict['enzymes'].at[idx, 'fwd_eff_info'] = self.parameters.get_value_info(row['forwardEfficiency'])
            m_dict['enzymes'].at[idx, 'bwd_eff_info'] = self.parameters.get_value_info(row['backwardEfficiency'])
        for idx, row in m_dict['processes'].iterrows():
            if '=' in row['machineryCapacity']:
                first_value = row['machineryCapacity'].split('=')[1]
                m_dict['processes'].at[idx, 'capacity_info'] = self.parameters.get_value_info(first_value)
        for idx, row in m_dict['densities'].iterrows():
            first_value = row['targetValue'].split('=')[1]
            m_dict['densities'].at[idx, 'value_info'] = self.parameters.get_value_info(first_value)
        # we temporarily create a unique index for table update
        m_dict['targets'].reset_index(inplace=True)
        for idx, row in m_dict['targets'].iterrows():
            first_value = row['targetValue'].split('=')[1]
            m_dict['targets'].at[idx, 'value_info'] = self.parameters.get_value_info(first_value)
        m_dict['targets'].set_index('targetGroup', inplace=True)

        with pd.ExcelWriter(fname) as writer:
            for name, df in m_dict.items():
                df.to_excel(writer, sheet_name=name)
        print(f'RBA model exported to {fname}')
        return True

    def check_unused(self):

        molecules = (set(self.metabolism.species) | set(self.dna.macromolecules) |
                     set(self.rnas.macromolecules) | set(self.proteins.macromolecules))
        parameters = set(self.parameters.functions) | set(self.parameters.aggregates)

        ref_parameters = set()
        ref_parameters |= self.processes.ref_parameters()
        ref_parameters |= self.densities.ref_parameters()
        ref_parameters |= self.targets.ref_parameters()
        ref_parameters |= self.enzymes.ref_parameters()
        functions_used_in_aggs = self.parameters.ref_functions(ref_parameters)
        ref_parameters |= functions_used_in_aggs
        #ref_parameters |= self.parameters.ref_functions(ref_parameters)

        ref_molecules = set()
        ref_molecules |= self.metabolism.ref_molecules()
        ref_molecules |= self.processes.ref_molecules()
        ref_molecules |= self.enzymes.ref_molecules()
        ref_molecules |= self.targets.ref_molecules()

        unused_parameters = parameters.difference(ref_parameters)
        unused_molecules = molecules.difference(ref_molecules)
        unused = 0
        if len(unused_parameters) > 0:
            print(f'{len(unused_parameters)} unused parameters:', unused_parameters)
            unused += len(unused_parameters)
        if len(unused_molecules) > 0:
            print(f'{len(unused_molecules)} unused molecules:', unused_molecules)
            unused += len(unused_molecules)
        if unused == 0:
            print('no unused parameters/molecules')


[docs]
    def validate(self, validate_sbml=True):
        """Validate compliance with SBML standards and completeness wrt RBA.

        The validation with SBML standards can be suppressed using the optional
        parameter `validate_sbml`.

        Validation against SBML standards is an optional task taking time.
        Validation could be skipped once model configuration is stable.

        Information on non-compliance is printed. Details are written to `tmp/tmp.txt`.
        In case of an unsuccessful validation, it is recommended to review `tmp/tmp.txt` and
        improve on the model configuration.

        Example: Ensure completeness of RbaModel and ensure compliance with SBML standards prior to export.

        .. code-block:: python

            if rba_model.validate():
                rba_model.export('iML1515_RBA.xml')

        :param bool validate_sbml: validate SBML compliance (default: True)
        :return: success status
        :rtype: bool
        """
        component_ids = {'species': set(self.metabolism.species),
                         'dna': set(self.dna.macromolecules),
                         'rna': set(self.rnas.macromolecules),
                         'protein': set(self.proteins.macromolecules),
                         'functions': set(self.parameters.functions),
                         'aggregates': set(self.parameters.aggregates)}
        valid = True
        _components = {'parameters', 'metabolism', 'processes', 'enzymes', 'densities', 'targets'}
        for component in _components:
            valid = valid and getattr(self, component).validate(component_ids)
        print(f'RBA model valid status: {valid}')

        if valid is True and validate_sbml is True:
            print(f'checking SBML compliance ...')
            return self.model.validate()
        else:
            return valid



[docs]
    def export(self, fname, export_format='SBML'):
        """Export RbaModel instance to SBML encoded file, RBApy encoded format (directory) or spreadsheet (.xlsx).

        Setting the optional parameter `export_format` to `RBApy` and providing a directory name in
        `fname` exports the file in proprietary RBApy 1.0 format. The SBML encoded model
        may include additional features and functionality.

        The spreadsheet (.xlsx) is helpful to inspect model configuration for of SBML and RBApy formats.

        Example: Export RbaModel instance to SBML encoded file and to spreadsheet.

        .. code-block:: python

            xba_model = XbaModel('iML1515.xml')
            xba_model.configure('xba_parameters.xlsx')

            rba_model = RbaModel(xba_model)
            rba_model.configure('rba_parameters.xlsx')
            if rba_model.validate():
                rba_model.export('iML1515_RBA.xml')
                rba_model.export('iML1515_RBA.xlsx')

        :param str fname: filename with extension '.xml' or '.xlsx', or a directory name for RBApy format
        :param str export_format: export format: 'SBML' or 'RBApy' (default: 'SBML')
        :return: success status
        :rtype: bool
        """
        assert export_format in {'SBML', 'RBApy'}, 'argument format must be "SBML" or "RBApy")'
        if export_format == 'SBML':
            return self.model.export(fname)
        elif export_format == 'RBApy':
            if fname.endswith('.xlsx'):
                return self.to_excel(fname)
            else:
                # export RBA model in RBA proprietary format
                if not os.path.exists(fname):
                    os.makedirs(fname)
                    print(f'RBA directory {fname} created')
                for component in components:
                    getattr(self, component).export_xml(fname)
                print(f'RBA model exported to: {fname}')
        return True
Source code for f2xba.rba_model.rba_model

f2xba

Navigation

Related Topics