Source code for f2xba.optimization.rba_opt

"""Implementation of RbaOptimization class.

Support both COBRApy and gurobipy interfaces

Note: RBA is a feasibility problem. Using a bisection algorithm as implemented in RbaPy
(Bulović et al., 2019), the highest growth rate is retrieved where the LP is still feasible.
RBA model includes variable bounds and stoichiometric coefficients that
are dependent on selected growth rate and medium.

Optimization problem related parameters need to be updated during optimization. This is performed
in according to SBML specifications using initial assignment procedures.

The class takes care of
- initial variable and constraints configuration
- extraction data configured in the SBML model, such as function definitions, initial assignments,
species references, XML annotations, Miriam annotations
- reconfiguration of optimization problem related parameters prior to each feasibility loop

Peter Schubert, HHU Duesseldorf, January 2024
"""

import re
import math
import pandas as pd
from collections import defaultdict
import tqdm


import sbmlxdf
import f2xba.prefixes as pf
from .rba_initial_assignments import InitialAssignments
from .optimize import Optimize, extract_net_fluxes
from .rba_results import RbaResults


XML_SPECIES_NS = 'http://www.hhu.de/ccb/rba/species/ns'
XML_COMPARTMENT_NS = 'http://www.hhu.de/ccb/rba/compartment/ns'



[docs]
class RbaOptimization(Optimize):
    """Support optimization of SBML encoded RBA based models generated by f2xba.

    Optimization support is provided via COBRApy and guropipy interfaces for RBA models
    created by f2xba, utilizing the bisection algorithm of RbaPy (Bulović et al., 2019).

    Ref: Bulović, A., Fischer, S., Dinh, M., Golib, F., Liebermeister, W., Poirier, C., Tournier,
    L., Klipp, E., Fromion, V., & Goelzer, A. (2019). Automated generation of bacterial resource
    allocation models. Metabolic Engineering, 55, 12-22.
    https://doi.org/https://doi.org/10.1016/j.ymben.2019.06.001

    Using the COBRApy interface for optimization of RBA and TRBA models:

    .. code-block:: python

        import cobra

        cobra_model = cobra.io.read_sbml_model('iML1515_RBA.xml')
        ro = RbaOptimization('iML1515_RBA.xml', cobra_model)

        sigma = ro.avg_enz_saturation
        importer_km = ro.importer_km
        rel_mmol_per_l =  (sigma / (1.0-min(sigma, .99))) * importer_km
        ex_sidx2mmol_per_l = {re.sub('EX_', '', ex_ridx): rel_mmol_per_l for ex_ridx in lb_medium}
        ex_sidx2mmol_per_l['h_e'] = 100.0 * sigma
        ro.medium = ex_sidx2mmol_per_l

        solution = ro.solve(gr_min=0.01, gr_max=1.2, bisection_tol=1e-3)

    Using the gurobipy interface for optimization of RBA and TRBA models, providing significant speed improvements:
    Note: GUROBI optimizer with gurobipy (https://www.gurobi.com) needs to be installed on your system.

    .. code-block:: python

        ro = RbaOptimization('iML1515_RBA.xml')

        sigma = ro.avg_enz_saturation
        importer_km = ro.importer_km
        rel_mmol_per_l =  (sigma / (1.0-min(sigma, .99))) * importer_km
        ex_sidx2mmol_per_l = {re.sub('EX_', '', ex_ridx): rel_mmol_per_l for ex_ridx in lb_medium}
        ex_sidx2mmol_per_l['h_e'] = 100.0 * sigma
        ro.medium = ex_sidx2mmol_per_l

        solution = ro.solve(gr_min=0.01, gr_max=1.2, bisection_tol=1e-3)
    """

    def __init__(self, fname, cobra_model=None):
        """Instantiation of RbaOptimization instance.

        :param str fname: filename of the SBML coded extended model
        :param cobra_model: reference to cobra model (default: None)
        :type cobra_model: cobra.Model
        """
        super().__init__('RBA', fname, cobra_model)

        required = {'species', 'reactions', 'unitDefs', 'funcDefs', 'initAssign', 'parameters', 'compartments'}
        missing = required.difference(set(self.m_dict))
        if len(missing) > 0:
            print(f'Missing components {missing} in SBML document!')
            raise AttributeError

        self.df_mm_data = self._get_macromolecule_data()
        """Pandas DataFrame with macromolecule related information."""

        self.df_enz_data = self._get_enzyme_data()
        """Pandas DataFramw with enzyme molecular weights."""

        self.enz_mm_composition = self._get_enzyme_mm_composition()
        """Mapping enzyme identifiers to enzyme composition."""

        self.initial_assignments = InitialAssignments(self)
        """Initial assignments configured in the RBA model."""

        self._configure_rba_model_constraints()

        # for COBRAPy models using IBM CPLEX, switch off scaling
        if cobra_model and 'cplex' in self.model.solver.interface.__name__:
            self.model.solver.problem.parameters.read.scale.set(-1)

    @property
    def medium(self):
        """Mimic medium property of COBRApy to set and retrieve medium wrt concentrations.

        Medium concentrations returned in units of mmol/l.

        :return: metabolite concentrations in external medium in mmol/l
        :rtype: dict
        """
        ex_midx_mmol_per_l = {}
        for sid in self.ex_rid2sid.values():
            mmol_per_l = self.initial_assignments.local_env.get(sid, 0.0)
            if mmol_per_l > 0.0:
                ex_midx_mmol_per_l[re.sub(f'^{pf.M_}', '', sid)] = mmol_per_l
        return ex_midx_mmol_per_l

    @medium.setter
    def medium(self, ex_sids_mmol_per_l):
        """Mimic medium property of COBRApy for medium assignments wrt concentrations.

        Allow assignment of medium using:
          RbaOptimization.medium = ex_sids_mmol_per_l

        :param dict ex_sids_mmol_per_l: external metabolite concentrations (mmol/l)
        """
        self.set_medium_conc(ex_sids_mmol_per_l)

    def _configure_rba_model_constraints(self):
        """Configure constraints related to RBA modelling

            I.e. Enzyme forward and reverse efficiencies configured at ≤ 0.0
        """
        if self.is_gpm:
            return self._gp_configure_rba_model_constraints()
        else:
            return self._cp_configure_rba_model_constraints()

    def _gp_configure_rba_model_constraints(self):
        """Configure constraints related to RBA modelling for GuropiPy interface"""
        for constr in self.gpm.getConstrs():
            if re.match(pf.C_EF_, constr.ConstrName) or re.match(pf.C_ER_, constr.ConstrName):
                constr.sense = '<'
        print(f'RBA enzyme efficiency constraints configured (C_EF_xxx, C_ER_xxx) ≤ 0')

    def _cp_configure_rba_model_constraints(self):
        """Configure constraints related to RBA modelling for COBRApy interface"""
        modify_constr_bounds = {pf.C_EF_: [None, 0.0], pf.C_ER_: [None, 0.0]}
        for constr in self.model.constraints:
            for cprefix, bounds in modify_constr_bounds.items():
                if re.match(cprefix, constr.name):
                    constr.lb, constr.ub = bounds
        print(f'RBA enzyme efficiency constraints configured (C_EF_xxx, C_ER_xxx) ≤ 0')

    # INITIAL DATA COLLECTION PART
    def _get_macromolecule_data(self):
        mms = {}
        for sid, row in self.m_dict['species'].iterrows():
            if re.match(pf.MM_, sid):
                mm_id = re.sub(f'^{pf.MM_}', '', sid)
                refs = sbmlxdf.misc.get_miriam_refs(row.get('miriamAnnotation'), 'uniprot', 'bqbiol:is')
                uniprot = refs[0] if len(refs) > 0 else None
                xml_attrs = sbmlxdf.misc.extract_xml_attrs(row.get('xmlAnnotation'), ns=XML_SPECIES_NS)
                mw_aa = float(xml_attrs['weight_aa']) if 'weight_aa' in xml_attrs else None
                mw_kda = float(xml_attrs['weight_kDa']) if 'weight_kDa' in xml_attrs else None
                scale = float(xml_attrs['scale']) if 'scale' in xml_attrs else 1.0
                mms[mm_id] = [xml_attrs['type'], uniprot, mw_kda, mw_aa, scale]
        cols = ['type', 'uniprot', 'mw_kDa', 'mw_aa', 'scale']
        df_mms = pd.DataFrame(mms.values(), index=list(mms), columns=cols)
        df_mms.index.name = 'mm_id'
        return df_mms

    def _get_enzyme_data(self):
        """Extract molecular weights of enzymes and process machines.

        From xmlAnnotation of reactions corresponding of related concentration variables

        :return: enzyme data with name, molecular weight and scaling.
        :rtype: pandas.DataFrame
        """
        enz_data = {}
        for vid, row in self.m_dict['reactions'].iterrows():
            if re.match(pf.V_EC_, vid) or re.match(pf.V_PMC_, vid):
                xml_attrs = sbmlxdf.misc.extract_xml_attrs(row.get('xmlAnnotation'), ns=XML_SPECIES_NS)
                mw_kda = float(xml_attrs['weight_kDa']) if 'weight_kDa' in xml_attrs else None
                scale = float(xml_attrs['scale']) if 'scale' in xml_attrs else 1.0
                enz_data[vid] = [mw_kda, scale]
        cols = ['mw_kDa', 'scale']
        df_enz_data = pd.DataFrame(enz_data.values(), index=list(enz_data), columns=cols)
        df_enz_data.index.name = 'id'
        return df_enz_data

    def _get_enzyme_mm_composition(self):
        """Determine Enzyme stoichiometry wrt gene products from model stoichiometry.

        Note: stoichiometry wrt enzyme and process machine concentrations
        are configured in initial model considering a given model growth rate (e.g. 1.0 h-1)
        From stoichiometric coefficients we can derive enzyme/process machine composition
        wrt to macro molecules

        :return: enzyme/process machine composition wrt macromolecules
        :rtype: dict
        """
        model_gr = self.m_dict['parameters'].loc['growth_rate'].value
        enz_mm_composition = defaultdict(dict)
        for rid, row in self.m_dict['reactions'].iterrows():
            if re.match(pf.V_EC_, rid) or re.match(pf.V_PMC_, rid):
                for sref_str in sbmlxdf.record_generator(row['reactants']):
                    params = sbmlxdf.extract_params(sref_str)
                    if re.match(pf.MM_, params['species']):
                        mm_id = re.sub(f'^{pf.MM_}', '', params['species'])
                        enz_mm_composition[rid][mm_id] = float(params['stoic']) / model_gr
        return dict(enz_mm_composition)

    def set_medium_conc(self, ex_sids_mmol_per_l):
        """Configure external metabolite concentrations (mmol/l).

        In the RBA, the growth medium is defined by external metabolite concentrations (mmol/L).
        These concentrations are used to determine the media-dependent transporter saturation
        levels of the uptake reactions. This determination is made using a Michaelis-Menten type
        saturation function with a default Michaelis constant (Km) of 1.0 mmol/L.
        External media concentration should be scaled to the default Km value.
        This implementation of RBA still uses uptake reactions, which will be opened for
        the specified medium.

        :meta private:
        :param ex_sids_mmol_per_l: external metabolite concentrations (mmol/l)
        :type ex_sids_mmol_per_l: dict (key: metabolite id/str, val: concentration/float)
        """
        # remove any trailing 'M_'
        ex_sidx_mmol_per_l = {re.sub(f'^{pf.M_}', '', sid): mmol_per_l for sid, mmol_per_l in
                              ex_sids_mmol_per_l.items()}

        # unlimited uptake for metabolites in external medium
        self.set_medium({self.sidx2ex_ridx.get(sidx, 0.0): 1000.0 for sidx in ex_sidx_mmol_per_l})

        # configure RBA related medium concentrations (and set concentrations of not available medium to zero)
        medium_conc = {sid: ex_sidx_mmol_per_l.get(re.sub(f'^{pf.M_}', '', sid), 0.0)
                       for sid in self.ex_rid2sid.values()}
        self.initial_assignments.set_medium_conc(medium_conc)

    def _set_growth_rate(self, growth_rate):
        """Set growth rate dependent model parameters in RBA models

        :param float growth_rate: selected growth rate in h-1
        :return:
        """
        self.initial_assignments.set_growth_rate(growth_rate)


[docs]
    def scale_efficiencies(self, scale_effs):
        """Scale enzyme efficiencies in RBA models to support manual parameter tuning.

        Use unscale_efficiencies() to return to original values.

        .. code-block:: python

            ro = RbaOptimization('iML1515_RBA.xml)
            scale_effs = {'PDH': {'fwd': 10.0}, 'ACt2rpp': {'fwd': 2.0, 'rev': 5.0}}
            ro.scale_efficiencies(scale_effs)
            solution = ro.solve(gr_min=0.02, gr_max=1.2, bisection_tol=1e-3)
            ro.unscale_efficiencies()

        :param dict of dict scale_effs: reaction ids (without prefix `R_`) and dict with reactions 'fwd' and/or 'rev' with float
        """
        if self.is_gpm:
            self._gp_scale_efficiencies(scale_effs)
        else:
            self._cp_scale_efficiencies(scale_effs)



[docs]
    def unscale_efficiencies(self):
        """Reset previously scaled efficiencies.

        see scale_efficiencies().
        """
        if self.is_gpm:
            self._gp_unscale_efficiencies()
        else:
            self._cp_unscale_efficiencies()


    def _cp_scale_efficiencies(self, scale_effs):
        """Scale efficiencies values for COBRApy interface, by updating coupling coefficients.

        :param scale_effs: reaction identifiers (without prefix `R_`) and scaling factor
        :type scale_effs: dict (key: reaction id/str, val: dict with directions 'fwd' and/or 'rev' and factor/float)
        """
        assert self.is_gpm is False, 'applicable to COBRApy interface only'
        orig_coupling = defaultdict(dict)
        for ridx, data, in scale_effs.items():
            var_id = f'{pf.V_EC_}{ridx}'
            if var_id in self.model.reactions:
                var = self.model.reactions.get_by_id(var_id)
                for direction, factor in data.items():
                    constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
                    try:
                        old_val = var.get_coefficient(constr_id)
                    except KeyError:
                        print(f'Enzyme constraint not found for {ridx} in {direction} direction')
                    else:
                        orig_coupling[ridx][direction] = old_val
                        var.add_metabolites({constr_id: old_val * factor}, combine=False)
            else:
                print(f'Enzyme constraint variable not found for reaction {ridx}')
        self.orig_coupling = dict(orig_coupling)

    def _cp_unscale_efficiencies(self):
        """Unscale kcat values for COBRApy interface, by resetting original coupling coefficients
        """
        assert self.is_gpm is False, 'applicable to COBRApy interface only'
        for ridx, data, in self.orig_coupling.items():
            var = self.model.reactions.get_by_id(f'{pf.V_EC_}{ridx}')
            for direction, old_val in data.items():
                constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
                var.add_metabolites({constr_id: old_val}, combine=False)
        self.orig_coupling = {}

    def _gp_scale_efficiencies(self, scale_effs):
        """Scale enzyme efficiency values for gurobipy interface, by updating coupling coefficients.

        :param scale_effs: reaction identifiers (without prefix `R_`) and scaling factor
        :type scale_effs: dict (key: reaction id/str, val: dict with directions 'fwd' and/or 'rev' and factor/float)
        """
        assert self.is_gpm is True, 'applicable to gurobipy interface only'
        orig_coupling = defaultdict(dict)
        self.gpm.update()
        for ridx, data, in scale_effs.items():
            var = self.gpm.getVarByName(f'{pf.V_EC_}{ridx}')
            if var:
                for direction, factor in data.items():
                    constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
                    constr = self.gpm.getConstrByName(constr_id)
                    if constr:
                        old_val = self.gpm.getCoeff(constr, var)
                        orig_coupling[ridx][direction] = old_val
                        self.gpm.chgCoeff(constr, var, old_val * factor)
                    else:
                        print(f'Enzyme constraint not found for {ridx} in {direction} direction')
            else:
                print(f'Enzyme constraint variable not found for reaction {ridx}')
        self.gpm.update()
        self.orig_coupling = dict(orig_coupling)

    def _gp_unscale_efficiencies(self):
        """Reset enzyme efficiencies to original values for gurobipy models.

        Used in manually tuning model efficiencies
        - call scale_efficiencies(scale_effs) prior to optimization
        - call unscale_efficiencies() after optimization to reset old efficiency values
        """
        assert self.is_gpm is True, 'applicable to gurobipy interface only'
        for ridx, data, in self.orig_coupling.items():
            var = self.gpm.getVarByName(f'{pf.V_EC_}{ridx}')
            for direction, old_val in data.items():
                constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
                constr = self.gpm.getConstrByName(constr_id)
                if var and constr:
                    self.gpm.chgCoeff(constr, var, old_val)
        self.gpm.update()
        self.orig_coeffs = {}


[docs]
    def get_init_assign_math(self, ia_refs):
        """Get expanded math string used in initial assignments of RBA models.

        RBA model uses initial assignments for model update during optimization.
        Model parameter values can be set depending on external media concentrations and growth rate.
        This method retrieves the expanded math for a list of initial assignment references,
        each reference contains a dictionary with the attributes:
        `var_id`: variable identifier,
        `constr_id`: constraint identifier, and
        `constr_type`: `reactants`, `products`, or `flux_bounds`.

        .. code-block:: python

            ro.get_init_assign_math([{'var_id': 'V_TSMC', 'constr_type': 'reactants', 'constr_id': 'M_hemeA_c'}])


        :param list(dict) ia_refs: reference to specific initial assignment
        :return: extended math strings
        :rtype: list(dict), like input with additional attribute 'math_str'
        """
        ia_data = []
        for data in ia_refs:
            var_id = data.get('var_id', '')
            constr_id = data.get('constr_id', '')
            constr_type = data.get('constr_type', '')

            if (var_id not in self.initial_assignments.target_rids or
                    constr_type not in ('reactants', 'products', 'flux_bounds') or
                    constr_id not in getattr(self.initial_assignments.target_rids[var_id], constr_type)):
                print(f'Invalid reference. var_id: {var_id}, constr_type: {constr_type}, constr_id: {constr_id}')
            else:
                trid = self.initial_assignments.target_rids[var_id]
                symbol_id = getattr(trid, constr_type)[constr_id]
                ia_func = self.initial_assignments.ia_functions[symbol_id]
                ia_data.append({'var_id': var_id, 'constr_type': constr_type, 'constr_id': constr_id,
                                 'math_str': ia_func.expanded_math})
        return ia_data



[docs]
    def set_init_assign_math(self, ia_data):
        """Set expanded math string used in initial assignments of RBA models.

         RBA model uses initial assignments for model update during optimization.
         Model parameter values can be set depending on external media concentrations and growth rate.
         This method sets the expanded math for a list of initial assignment references,
         each reference contains a dictionary with the attributes:
         `var_id`: variable identifier,
         `constr_id`: constraint identifier,
         `constr_type`: `reactants`, `products`, or `flux_bounds`, and
         `math`: math string, e.g. `growth_rate * 0.002` or a fixed value as 0.0.

        .. code-block:: python

            ia_data_anaerobe = [{'var_id': 'V_TSMC', 'constr_type': 'reactants', 'constr_id': 'M_hemeA_c', 'math_str': '0.0'}]
            if 'EX_o2_e' not in medium:
                orig_ia_data = ro.set_init_assign_math(ia_data_anaerobe)
            solution = ro.solve(gr_min=0.01, gr_max=0.7, bisection_tol=1e-3)
            if 'EX_o2_e' not in medium:
                ro.set_init_assign_math(orig_ia_data)

         :param list(dict) ia_data: reference to specific initial assignment
         :return: the original math string, prior to modification
         :rtype: list(dict), like input, but with original math string
         """
        orig_ia_data = []
        for data in ia_data:
            var_id = data.get('var_id', '')
            constr_id = data.get('constr_id', '')
            constr_type = data.get('constr_type', '')
            new_math = str(data.get('math_str', ''))

            if (var_id not in self.initial_assignments.target_rids or
                    constr_type not in ('reactants', 'products', 'flux_bounds') or
                    constr_id not in getattr(self.initial_assignments.target_rids[var_id], constr_type) or
                    len(new_math) == 0):
                print(
                    f'Invalid record. var_id: {var_id}, constr_type: {constr_type}, constr_id: {constr_id}, math_str: {new_math}')
            else:
                trid = self.initial_assignments.target_rids[var_id]
                symbol_id = getattr(trid, constr_type)[constr_id]
                ia_func = self.initial_assignments.ia_functions[symbol_id]
                org_math = ia_func.expanded_math
                ia_func.expanded_math = new_math
                orig_ia_data.append(
                    {'var_id': var_id, 'constr_type': constr_type, 'constr_id': constr_id, 'math_str': org_math})
        return orig_ia_data



[docs]
    def solve(self, gr_min=0.0, gr_max=1.5, bisection_tol=1e-5, max_iter=40):
        """Solve RBA feasibility problem using bisection algorithm.

        Support both COBRApy and gurobipy interfaces.

        :param float gr_min: minimum growth rate in h-1
        :param float gr_max: maximum growth rate in h-1
        :param float bisection_tol: stop criteria based on growth rate tolerances (default: 1e-3)
        :param int max_iter:  stop criteria based on iteration number (default: 40)
        :return: optimization solution
        :rtype: class:`Solution`
        """
        if self.is_gpm:
            return self._gp_solve(gr_min, gr_max, bisection_tol, max_iter)
        else:
            return self._cp_solve(gr_min, gr_max, bisection_tol, max_iter)


    def _gp_solve(self, gr_min, gr_max, bisection_tol, max_iter):
        """Solve RBA feasibility problem using bisection algorithm of RbaPy 1.0.

        Optionally we can modify the model to non-negative variables only.

        :param float gr_min: minimum growth rate to test in h-1, might have to be > 0.0 depending on targets
        :param float gr_max: maximum growth rate to test in h-1
        :param float bisection_tol: (optional, default 1e-3) stop criteria based on growth rate tolerances
        :param int max_iter: (optional, default 40) stop criteria based on number of iterations
        :return: optimization solution
        :rtype: class:`Solution`
        """
        # bisection algorithm to narrow in on maximum growth rate
        solution = None
        self._set_growth_rate(gr_min)
        if self.optimize().status == 'optimal':
            n_iter = 1
            while ((gr_max - gr_min) > bisection_tol) and (n_iter < max_iter):
                gr_test = gr_min + 0.5 * (gr_max - gr_min)
                self._set_growth_rate(gr_test)
                # self.gpm.reset()
                if self.optimize().status == 'optimal':
                    gr_min = gr_test
                else:
                    gr_max = gr_test
                n_iter += 1

            # collect optimization solution at optimum growth rate
            if (gr_max - gr_min) < bisection_tol:
                gr_opt = gr_min
                self._set_growth_rate(gr_opt)
                solution = self.optimize()

                # if solution with gr_min is no longer optimal, lower gr_opt slightly and try again
                if solution.status != 'optimal':
                    gr_opt = max(0.0, gr_opt - 0.5 * bisection_tol)
                    self._set_growth_rate(gr_opt)
                    solution = self.optimize()

                solution.fluxes = extract_net_fluxes(solution.fluxes)
                solution.objective_value = gr_opt
                solution.n_iter = n_iter
                # determine final density constraint
                col = self.gpm.getCol(self.gpm.getVarByName('V_TCD'))
                solution.density_constraints = {col.getConstr(idx).constrname: -col.getCoeff(idx)
                                                for idx in range(col.size())}
            else:
                print('no optimal growth rate')

        return solution

    def _cp_solve(self, gr_min, gr_max, bisection_tol, max_iter):
        """Solve RBA feasibility problem using bisection algorithm of RbaPy 1.0.

        Use COBRApy model contexts, so we can support outer model contexts

        :param float gr_min: minimum growth rate to test in h-1, might have to be > 0.0 depending on targets
        :param float gr_max: maximum growth rate to test in h-1
        :param float bisection_tol: (optional, default 1e-3) stop criteria based on growth rate tolerances
        :param int max_iter: (optional, default 40) stop criteria based on number of iterations
        :return: optimization solution
        :rtype: class:`Solution`
        """
        # first, check feasibility a minimal (or zero) growth
        # with self.model:
        self._set_growth_rate(gr_min)
        opt_value = self.model.slim_optimize()

        # bisection algorithm to narrow in on maximum growth rate
        if math.isfinite(opt_value):
            n_iter = 1
            while ((gr_max - gr_min) > bisection_tol) and (n_iter < max_iter):
                gr_test = gr_min + 0.5 * (gr_max - gr_min)
                # with self.model:
                self._set_growth_rate(gr_test)
                opt_value = self.model.slim_optimize()
                if math.isfinite(opt_value):
                    gr_min = gr_test
                else:
                    gr_max = gr_test
                n_iter += 1

            # collect optimization solution at optimum growth rate
            if (gr_max - gr_min) < bisection_tol:
                gr_opt = gr_min
                self._set_growth_rate(gr_opt)
                solution = self.model.optimize()

                # if solution with gr_min is no longer optimal, lower gr_opt slightly and try again
                if solution.status != 'optimal':
                    gr_opt = max(0.0, gr_opt - 0.5 * bisection_tol)
                    solution = self.model.optimize()

                solution.objective_value = gr_opt
                solution.n_iter = n_iter
                solution.density_constraints = {met.id: -val for met, val
                                                in self.model.reactions.get_by_id(pf.V_TCD).metabolites.items()}
                return solution
            else:
                print('no optimal growth rate')
                return None
        else:
            print(f'Problem infeasible at minimum growth of {gr_min}')
            return None


[docs]
    def single_gene_deletion(self, genes=None, solution=None, **kwargs):
        """Perform a single gene deletion analysis for RBA based models using gurobipy interface.

        Interface aligned to COBRApy single_gene_deletion() method.
        Perform single gene deletion simulations for provided list of gene in `genes`.
        If `genes` is not provided, perform gene deletion for all genes that may be
        active in the wild type solution. In case a gene is not required in the wild type solution, a knockout
        simulation is not performed for this gene. Its growth rate value is set to the wild type value and
        its optimization status is set to `wt_solution`.
        A wild type solution can be provided with parameter `solution`, alternatively a wild type solution
        is determined automatically.

        Following keyword arguments can be added to the list of parameters:
        `gr_min`: (default 0.0) minimum growth rate to test in h-1,
        `gr_min_alt`: (optional) alternative minimum growth rate in h-1 (used, if infeasible solution with gr_min),
        `gr_max`: (default 1.5) maximum growth rate to test in h-1,
        `bisection_tol`: (default 1e-5) stop criteria based on growth rate tolerance,
        `max_iter`: (default 40) stop criteria based on number of iterations.

        :param list or set genes: (optional) gene ids,
        :param solution: (optional) wild type RBA solution
        :type solution: :class:`Solution`
        :param kwargs: keyword arguments used for RBA bisectional optimization
        :return: single gene deletion results per gene with growth rate in h-1, optimization status and fitness value
        :rtype: pandas.DataFrame
        """
        if self.is_gpm is None:
            print('Method implemented for gurobipy interface only.')
            return pd.DataFrame()

        alt_kwargs = kwargs.copy()
        if 'gr_min_alt' in kwargs:
            alt_kwargs['gr_min'] = kwargs.pop('gr_min_alt')
            del alt_kwargs['gr_min_alt']

        # determine wild type growth rate and fluxes, if solution not provided
        if solution is None:
            solution = self.solve(**kwargs)
        wt_gr = solution.objective_value
        wt_fluxes = dict(RbaResults(self, {'wt': solution}).collect_fluxes()['wt'])

        all_genes = set(self.m_dict['fbcGeneProducts'].label.values)
        tx_genes, metab_genes = self.get_tx_metab_genes()
        pm_genes = all_genes.difference(tx_genes.union(metab_genes))

        # determine gene list, if not provided
        if genes is None:
            genes = self.get_active_genes(wt_fluxes)
        else:
            all_genes = genes

        # optimization loop for single gene deletions
        cols = ['growth_rate', 'status', 'fitness']
        sgko_results = {'wt': [wt_gr, 'wt_solution', 1.0]}
        for gene in tqdm.tqdm(sorted(all_genes)):
            if gene in genes:
                # simulate a single gene deletion
                orig_rid_bounds = self.gene_knock_outs(gene)
                solution = self.solve(**kwargs)
                #if (solution is not None) and solution.status == 'infeasible':
                #    print(f'first solution infeasible - reset model {gene}')
                #    self.gpm.reset()
                #    solution = self.solve(**alt_kwargs)
                #    if solution is None:
                #        print(f'second solution infeasible {gene}')
                #    else:
                #        print(f'solution status second attempt for {gene}: {solution.status}')
                self.set_variable_bounds(orig_rid_bounds)

                # process simulation result
                if solution is None:
                    sgko_results[gene] = [0.0, 'infeasible', 0.0]
                elif solution.status in {'optimal'}:
                    mutant_gr = solution.objective_value
                    sgko_results[gene] = [mutant_gr, solution.status, mutant_gr / wt_gr]
                else:
                    sgko_results[gene] = [0.0, solution.status, 0.0]
            elif gene in pm_genes:
                sgko_results[gene] = [0.0, 'process_machine']
            else:
                # genes not in gene_list are assumed to not impact the wild type solution
                sgko_results[gene] = [wt_gr, 'wt_solution', 1.0]

        df_sgko = pd.DataFrame(sgko_results.values(), index=list(sgko_results), columns=cols)
        df_sgko.index.name = 'gene'

        return df_sgko
Source code for f2xba.optimization.rba_opt

f2xba

Navigation

Related Topics