"""Implementation of RbaOptimization class.
Support both COBRApy and gurobipy interfaces
Note: RBA is a feasibility problem. Using a bisection algorithm as implemented in RbaPy
(Bulović et al., 2019), the highest growth rate is retrieved where the LP is still feasible.
RBA model includes variable bounds and stoichiometric coefficients that
are dependent on selected growth rate and medium.
Optimization problem related parameters need to be updated during optimization. This is performed
in according to SBML specifications using initial assignment procedures.
The class takes care of
- initial variable and constraints configuration
- extraction data configured in the SBML model, such as function definitions, initial assignments,
species references, XML annotations, Miriam annotations
- reconfiguration of optimization problem related parameters prior to each feasibility loop
Peter Schubert, HHU Duesseldorf, January 2024
"""
import re
import math
import pandas as pd
from collections import defaultdict
import tqdm
import sbmlxdf
import f2xba.prefixes as pf
from .rba_initial_assignments import InitialAssignments
from .optimize import Optimize, extract_net_fluxes
from .rba_results import RbaResults
XML_SPECIES_NS = 'http://www.hhu.de/ccb/rba/species/ns'
XML_COMPARTMENT_NS = 'http://www.hhu.de/ccb/rba/compartment/ns'
[docs]
class RbaOptimization(Optimize):
"""Support optimization of SBML encoded RBA based models generated by f2xba.
Optimization support is provided via COBRApy and guropipy interfaces for RBA models
created by f2xba, utilizing the bisection algorithm of RbaPy (Bulović et al., 2019).
Ref: Bulović, A., Fischer, S., Dinh, M., Golib, F., Liebermeister, W., Poirier, C., Tournier,
L., Klipp, E., Fromion, V., & Goelzer, A. (2019). Automated generation of bacterial resource
allocation models. Metabolic Engineering, 55, 12-22.
https://doi.org/https://doi.org/10.1016/j.ymben.2019.06.001
Using the COBRApy interface for optimization of RBA and TRBA models:
.. code-block:: python
import cobra
cobra_model = cobra.io.read_sbml_model('iML1515_RBA.xml')
ro = RbaOptimization('iML1515_RBA.xml', cobra_model)
sigma = ro.avg_enz_saturation
importer_km = ro.importer_km
rel_mmol_per_l = (sigma / (1.0-min(sigma, .99))) * importer_km
ex_sidx2mmol_per_l = {re.sub('EX_', '', ex_ridx): rel_mmol_per_l for ex_ridx in lb_medium}
ex_sidx2mmol_per_l['h_e'] = 100.0 * sigma
ro.medium = ex_sidx2mmol_per_l
solution = ro.solve(gr_min=0.01, gr_max=1.2, bisection_tol=1e-3)
Using the gurobipy interface for optimization of RBA and TRBA models, providing significant speed improvements:
Note: GUROBI optimizer with gurobipy (https://www.gurobi.com) needs to be installed on your system.
.. code-block:: python
ro = RbaOptimization('iML1515_RBA.xml')
sigma = ro.avg_enz_saturation
importer_km = ro.importer_km
rel_mmol_per_l = (sigma / (1.0-min(sigma, .99))) * importer_km
ex_sidx2mmol_per_l = {re.sub('EX_', '', ex_ridx): rel_mmol_per_l for ex_ridx in lb_medium}
ex_sidx2mmol_per_l['h_e'] = 100.0 * sigma
ro.medium = ex_sidx2mmol_per_l
solution = ro.solve(gr_min=0.01, gr_max=1.2, bisection_tol=1e-3)
"""
def __init__(self, fname, cobra_model=None):
"""Instantiation of RbaOptimization instance.
:param str fname: filename of the SBML coded extended model
:param cobra_model: reference to cobra model (default: None)
:type cobra_model: cobra.Model
"""
super().__init__('RBA', fname, cobra_model)
required = {'species', 'reactions', 'unitDefs', 'funcDefs', 'initAssign', 'parameters', 'compartments'}
missing = required.difference(set(self.m_dict))
if len(missing) > 0:
print(f'Missing components {missing} in SBML document!')
raise AttributeError
self.df_mm_data = self._get_macromolecule_data()
"""Pandas DataFrame with macromolecule related information."""
self.df_enz_data = self._get_enzyme_data()
"""Pandas DataFramw with enzyme molecular weights."""
self.enz_mm_composition = self._get_enzyme_mm_composition()
"""Mapping enzyme identifiers to enzyme composition."""
self.initial_assignments = InitialAssignments(self)
"""Initial assignments configured in the RBA model."""
self._configure_rba_model_constraints()
# for COBRAPy models using IBM CPLEX, switch off scaling
if cobra_model and 'cplex' in self.model.solver.interface.__name__:
self.model.solver.problem.parameters.read.scale.set(-1)
@property
def medium(self):
"""Mimic medium property of COBRApy to set and retrieve medium wrt concentrations.
Medium concentrations returned in units of mmol/l.
:return: metabolite concentrations in external medium in mmol/l
:rtype: dict
"""
ex_midx_mmol_per_l = {}
for sid in self.ex_rid2sid.values():
mmol_per_l = self.initial_assignments.local_env.get(sid, 0.0)
if mmol_per_l > 0.0:
ex_midx_mmol_per_l[re.sub(f'^{pf.M_}', '', sid)] = mmol_per_l
return ex_midx_mmol_per_l
@medium.setter
def medium(self, ex_sids_mmol_per_l):
"""Mimic medium property of COBRApy for medium assignments wrt concentrations.
Allow assignment of medium using:
RbaOptimization.medium = ex_sids_mmol_per_l
:param dict ex_sids_mmol_per_l: external metabolite concentrations (mmol/l)
"""
self.set_medium_conc(ex_sids_mmol_per_l)
def _configure_rba_model_constraints(self):
"""Configure constraints related to RBA modelling
I.e. Enzyme forward and reverse efficiencies configured at ≤ 0.0
"""
if self.is_gpm:
return self._gp_configure_rba_model_constraints()
else:
return self._cp_configure_rba_model_constraints()
def _gp_configure_rba_model_constraints(self):
"""Configure constraints related to RBA modelling for GuropiPy interface"""
for constr in self.gpm.getConstrs():
if re.match(pf.C_EF_, constr.ConstrName) or re.match(pf.C_ER_, constr.ConstrName):
constr.sense = '<'
print(f'RBA enzyme efficiency constraints configured (C_EF_xxx, C_ER_xxx) ≤ 0')
def _cp_configure_rba_model_constraints(self):
"""Configure constraints related to RBA modelling for COBRApy interface"""
modify_constr_bounds = {pf.C_EF_: [None, 0.0], pf.C_ER_: [None, 0.0]}
for constr in self.model.constraints:
for cprefix, bounds in modify_constr_bounds.items():
if re.match(cprefix, constr.name):
constr.lb, constr.ub = bounds
print(f'RBA enzyme efficiency constraints configured (C_EF_xxx, C_ER_xxx) ≤ 0')
# INITIAL DATA COLLECTION PART
def _get_macromolecule_data(self):
mms = {}
for sid, row in self.m_dict['species'].iterrows():
if re.match(pf.MM_, sid):
mm_id = re.sub(f'^{pf.MM_}', '', sid)
refs = sbmlxdf.misc.get_miriam_refs(row.get('miriamAnnotation'), 'uniprot', 'bqbiol:is')
uniprot = refs[0] if len(refs) > 0 else None
xml_attrs = sbmlxdf.misc.extract_xml_attrs(row.get('xmlAnnotation'), ns=XML_SPECIES_NS)
mw_aa = float(xml_attrs['weight_aa']) if 'weight_aa' in xml_attrs else None
mw_kda = float(xml_attrs['weight_kDa']) if 'weight_kDa' in xml_attrs else None
scale = float(xml_attrs['scale']) if 'scale' in xml_attrs else 1.0
mms[mm_id] = [xml_attrs['type'], uniprot, mw_kda, mw_aa, scale]
cols = ['type', 'uniprot', 'mw_kDa', 'mw_aa', 'scale']
df_mms = pd.DataFrame(mms.values(), index=list(mms), columns=cols)
df_mms.index.name = 'mm_id'
return df_mms
def _get_enzyme_data(self):
"""Extract molecular weights of enzymes and process machines.
From xmlAnnotation of reactions corresponding of related concentration variables
:return: enzyme data with name, molecular weight and scaling.
:rtype: pandas.DataFrame
"""
enz_data = {}
for vid, row in self.m_dict['reactions'].iterrows():
if re.match(pf.V_EC_, vid) or re.match(pf.V_PMC_, vid):
xml_attrs = sbmlxdf.misc.extract_xml_attrs(row.get('xmlAnnotation'), ns=XML_SPECIES_NS)
mw_kda = float(xml_attrs['weight_kDa']) if 'weight_kDa' in xml_attrs else None
scale = float(xml_attrs['scale']) if 'scale' in xml_attrs else 1.0
enz_data[vid] = [mw_kda, scale]
cols = ['mw_kDa', 'scale']
df_enz_data = pd.DataFrame(enz_data.values(), index=list(enz_data), columns=cols)
df_enz_data.index.name = 'id'
return df_enz_data
def _get_enzyme_mm_composition(self):
"""Determine Enzyme stoichiometry wrt gene products from model stoichiometry.
Note: stoichiometry wrt enzyme and process machine concentrations
are configured in initial model considering a given model growth rate (e.g. 1.0 h-1)
From stoichiometric coefficients we can derive enzyme/process machine composition
wrt to macro molecules
:return: enzyme/process machine composition wrt macromolecules
:rtype: dict
"""
model_gr = self.m_dict['parameters'].loc['growth_rate'].value
enz_mm_composition = defaultdict(dict)
for rid, row in self.m_dict['reactions'].iterrows():
if re.match(pf.V_EC_, rid) or re.match(pf.V_PMC_, rid):
for sref_str in sbmlxdf.record_generator(row['reactants']):
params = sbmlxdf.extract_params(sref_str)
if re.match(pf.MM_, params['species']):
mm_id = re.sub(f'^{pf.MM_}', '', params['species'])
enz_mm_composition[rid][mm_id] = float(params['stoic']) / model_gr
return dict(enz_mm_composition)
def set_medium_conc(self, ex_sids_mmol_per_l):
"""Configure external metabolite concentrations (mmol/l).
In the RBA, the growth medium is defined by external metabolite concentrations (mmol/L).
These concentrations are used to determine the media-dependent transporter saturation
levels of the uptake reactions. This determination is made using a Michaelis-Menten type
saturation function with a default Michaelis constant (Km) of 1.0 mmol/L.
External media concentration should be scaled to the default Km value.
This implementation of RBA still uses uptake reactions, which will be opened for
the specified medium.
:meta private:
:param ex_sids_mmol_per_l: external metabolite concentrations (mmol/l)
:type ex_sids_mmol_per_l: dict (key: metabolite id/str, val: concentration/float)
"""
# remove any trailing 'M_'
ex_sidx_mmol_per_l = {re.sub(f'^{pf.M_}', '', sid): mmol_per_l for sid, mmol_per_l in
ex_sids_mmol_per_l.items()}
# unlimited uptake for metabolites in external medium
self.set_medium({self.sidx2ex_ridx.get(sidx, 0.0): 1000.0 for sidx in ex_sidx_mmol_per_l})
# configure RBA related medium concentrations (and set concentrations of not available medium to zero)
medium_conc = {sid: ex_sidx_mmol_per_l.get(re.sub(f'^{pf.M_}', '', sid), 0.0)
for sid in self.ex_rid2sid.values()}
self.initial_assignments.set_medium_conc(medium_conc)
def _set_growth_rate(self, growth_rate):
"""Set growth rate dependent model parameters in RBA models
:param float growth_rate: selected growth rate in h-1
:return:
"""
self.initial_assignments.set_growth_rate(growth_rate)
[docs]
def scale_efficiencies(self, scale_effs):
"""Scale enzyme efficiencies in RBA models to support manual parameter tuning.
Use unscale_efficiencies() to return to original values.
.. code-block:: python
ro = RbaOptimization('iML1515_RBA.xml)
scale_effs = {'PDH': {'fwd': 10.0}, 'ACt2rpp': {'fwd': 2.0, 'rev': 5.0}}
ro.scale_efficiencies(scale_effs)
solution = ro.solve(gr_min=0.02, gr_max=1.2, bisection_tol=1e-3)
ro.unscale_efficiencies()
:param dict of dict scale_effs: reaction ids (without prefix `R_`) and dict with reactions 'fwd' and/or 'rev' with float
"""
if self.is_gpm:
self._gp_scale_efficiencies(scale_effs)
else:
self._cp_scale_efficiencies(scale_effs)
[docs]
def unscale_efficiencies(self):
"""Reset previously scaled efficiencies.
see scale_efficiencies().
"""
if self.is_gpm:
self._gp_unscale_efficiencies()
else:
self._cp_unscale_efficiencies()
def _cp_scale_efficiencies(self, scale_effs):
"""Scale efficiencies values for COBRApy interface, by updating coupling coefficients.
:param scale_effs: reaction identifiers (without prefix `R_`) and scaling factor
:type scale_effs: dict (key: reaction id/str, val: dict with directions 'fwd' and/or 'rev' and factor/float)
"""
assert self.is_gpm is False, 'applicable to COBRApy interface only'
orig_coupling = defaultdict(dict)
for ridx, data, in scale_effs.items():
var_id = f'{pf.V_EC_}{ridx}'
if var_id in self.model.reactions:
var = self.model.reactions.get_by_id(var_id)
for direction, factor in data.items():
constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
try:
old_val = var.get_coefficient(constr_id)
except KeyError:
print(f'Enzyme constraint not found for {ridx} in {direction} direction')
else:
orig_coupling[ridx][direction] = old_val
var.add_metabolites({constr_id: old_val * factor}, combine=False)
else:
print(f'Enzyme constraint variable not found for reaction {ridx}')
self.orig_coupling = dict(orig_coupling)
def _cp_unscale_efficiencies(self):
"""Unscale kcat values for COBRApy interface, by resetting original coupling coefficients
"""
assert self.is_gpm is False, 'applicable to COBRApy interface only'
for ridx, data, in self.orig_coupling.items():
var = self.model.reactions.get_by_id(f'{pf.V_EC_}{ridx}')
for direction, old_val in data.items():
constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
var.add_metabolites({constr_id: old_val}, combine=False)
self.orig_coupling = {}
def _gp_scale_efficiencies(self, scale_effs):
"""Scale enzyme efficiency values for gurobipy interface, by updating coupling coefficients.
:param scale_effs: reaction identifiers (without prefix `R_`) and scaling factor
:type scale_effs: dict (key: reaction id/str, val: dict with directions 'fwd' and/or 'rev' and factor/float)
"""
assert self.is_gpm is True, 'applicable to gurobipy interface only'
orig_coupling = defaultdict(dict)
self.gpm.update()
for ridx, data, in scale_effs.items():
var = self.gpm.getVarByName(f'{pf.V_EC_}{ridx}')
if var:
for direction, factor in data.items():
constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
constr = self.gpm.getConstrByName(constr_id)
if constr:
old_val = self.gpm.getCoeff(constr, var)
orig_coupling[ridx][direction] = old_val
self.gpm.chgCoeff(constr, var, old_val * factor)
else:
print(f'Enzyme constraint not found for {ridx} in {direction} direction')
else:
print(f'Enzyme constraint variable not found for reaction {ridx}')
self.gpm.update()
self.orig_coupling = dict(orig_coupling)
def _gp_unscale_efficiencies(self):
"""Reset enzyme efficiencies to original values for gurobipy models.
Used in manually tuning model efficiencies
- call scale_efficiencies(scale_effs) prior to optimization
- call unscale_efficiencies() after optimization to reset old efficiency values
"""
assert self.is_gpm is True, 'applicable to gurobipy interface only'
for ridx, data, in self.orig_coupling.items():
var = self.gpm.getVarByName(f'{pf.V_EC_}{ridx}')
for direction, old_val in data.items():
constr_id = f'{pf.C_EF_}{ridx}' if direction == 'fwd' else f'{pf.C_ER_}{ridx}'
constr = self.gpm.getConstrByName(constr_id)
if var and constr:
self.gpm.chgCoeff(constr, var, old_val)
self.gpm.update()
self.orig_coeffs = {}
[docs]
def get_init_assign_math(self, ia_refs):
"""Get expanded math string used in initial assignments of RBA models.
RBA model uses initial assignments for model update during optimization.
Model parameter values can be set depending on external media concentrations and growth rate.
This method retrieves the expanded math for a list of initial assignment references,
each reference contains a dictionary with the attributes:
`var_id`: variable identifier,
`constr_id`: constraint identifier, and
`constr_type`: `reactants`, `products`, or `flux_bounds`.
.. code-block:: python
ro.get_init_assign_math([{'var_id': 'V_TSMC', 'constr_type': 'reactants', 'constr_id': 'M_hemeA_c'}])
:param list(dict) ia_refs: reference to specific initial assignment
:return: extended math strings
:rtype: list(dict), like input with additional attribute 'math_str'
"""
ia_data = []
for data in ia_refs:
var_id = data.get('var_id', '')
constr_id = data.get('constr_id', '')
constr_type = data.get('constr_type', '')
if (var_id not in self.initial_assignments.target_rids or
constr_type not in ('reactants', 'products', 'flux_bounds') or
constr_id not in getattr(self.initial_assignments.target_rids[var_id], constr_type)):
print(f'Invalid reference. var_id: {var_id}, constr_type: {constr_type}, constr_id: {constr_id}')
else:
trid = self.initial_assignments.target_rids[var_id]
symbol_id = getattr(trid, constr_type)[constr_id]
ia_func = self.initial_assignments.ia_functions[symbol_id]
ia_data.append({'var_id': var_id, 'constr_type': constr_type, 'constr_id': constr_id,
'math_str': ia_func.expanded_math})
return ia_data
[docs]
def set_init_assign_math(self, ia_data):
"""Set expanded math string used in initial assignments of RBA models.
RBA model uses initial assignments for model update during optimization.
Model parameter values can be set depending on external media concentrations and growth rate.
This method sets the expanded math for a list of initial assignment references,
each reference contains a dictionary with the attributes:
`var_id`: variable identifier,
`constr_id`: constraint identifier,
`constr_type`: `reactants`, `products`, or `flux_bounds`, and
`math`: math string, e.g. `growth_rate * 0.002` or a fixed value as 0.0.
.. code-block:: python
ia_data_anaerobe = [{'var_id': 'V_TSMC', 'constr_type': 'reactants', 'constr_id': 'M_hemeA_c', 'math_str': '0.0'}]
if 'EX_o2_e' not in medium:
orig_ia_data = ro.set_init_assign_math(ia_data_anaerobe)
solution = ro.solve(gr_min=0.01, gr_max=0.7, bisection_tol=1e-3)
if 'EX_o2_e' not in medium:
ro.set_init_assign_math(orig_ia_data)
:param list(dict) ia_data: reference to specific initial assignment
:return: the original math string, prior to modification
:rtype: list(dict), like input, but with original math string
"""
orig_ia_data = []
for data in ia_data:
var_id = data.get('var_id', '')
constr_id = data.get('constr_id', '')
constr_type = data.get('constr_type', '')
new_math = str(data.get('math_str', ''))
if (var_id not in self.initial_assignments.target_rids or
constr_type not in ('reactants', 'products', 'flux_bounds') or
constr_id not in getattr(self.initial_assignments.target_rids[var_id], constr_type) or
len(new_math) == 0):
print(
f'Invalid record. var_id: {var_id}, constr_type: {constr_type}, constr_id: {constr_id}, math_str: {new_math}')
else:
trid = self.initial_assignments.target_rids[var_id]
symbol_id = getattr(trid, constr_type)[constr_id]
ia_func = self.initial_assignments.ia_functions[symbol_id]
org_math = ia_func.expanded_math
ia_func.expanded_math = new_math
orig_ia_data.append(
{'var_id': var_id, 'constr_type': constr_type, 'constr_id': constr_id, 'math_str': org_math})
return orig_ia_data
[docs]
def solve(self, gr_min=0.0, gr_max=1.5, bisection_tol=1e-5, max_iter=40):
"""Solve RBA feasibility problem using bisection algorithm.
Support both COBRApy and gurobipy interfaces.
:param float gr_min: minimum growth rate in h-1
:param float gr_max: maximum growth rate in h-1
:param float bisection_tol: stop criteria based on growth rate tolerances (default: 1e-3)
:param int max_iter: stop criteria based on iteration number (default: 40)
:return: optimization solution
:rtype: class:`Solution`
"""
if self.is_gpm:
return self._gp_solve(gr_min, gr_max, bisection_tol, max_iter)
else:
return self._cp_solve(gr_min, gr_max, bisection_tol, max_iter)
def _gp_solve(self, gr_min, gr_max, bisection_tol, max_iter):
"""Solve RBA feasibility problem using bisection algorithm of RbaPy 1.0.
Optionally we can modify the model to non-negative variables only.
:param float gr_min: minimum growth rate to test in h-1, might have to be > 0.0 depending on targets
:param float gr_max: maximum growth rate to test in h-1
:param float bisection_tol: (optional, default 1e-3) stop criteria based on growth rate tolerances
:param int max_iter: (optional, default 40) stop criteria based on number of iterations
:return: optimization solution
:rtype: class:`Solution`
"""
# bisection algorithm to narrow in on maximum growth rate
solution = None
self._set_growth_rate(gr_min)
if self.optimize().status == 'optimal':
n_iter = 1
while ((gr_max - gr_min) > bisection_tol) and (n_iter < max_iter):
gr_test = gr_min + 0.5 * (gr_max - gr_min)
self._set_growth_rate(gr_test)
# self.gpm.reset()
if self.optimize().status == 'optimal':
gr_min = gr_test
else:
gr_max = gr_test
n_iter += 1
# collect optimization solution at optimum growth rate
if (gr_max - gr_min) < bisection_tol:
gr_opt = gr_min
self._set_growth_rate(gr_opt)
solution = self.optimize()
# if solution with gr_min is no longer optimal, lower gr_opt slightly and try again
if solution.status != 'optimal':
gr_opt = max(0.0, gr_opt - 0.5 * bisection_tol)
self._set_growth_rate(gr_opt)
solution = self.optimize()
solution.fluxes = extract_net_fluxes(solution.fluxes)
solution.objective_value = gr_opt
solution.n_iter = n_iter
# determine final density constraint
col = self.gpm.getCol(self.gpm.getVarByName('V_TCD'))
solution.density_constraints = {col.getConstr(idx).constrname: -col.getCoeff(idx)
for idx in range(col.size())}
else:
print('no optimal growth rate')
return solution
def _cp_solve(self, gr_min, gr_max, bisection_tol, max_iter):
"""Solve RBA feasibility problem using bisection algorithm of RbaPy 1.0.
Use COBRApy model contexts, so we can support outer model contexts
:param float gr_min: minimum growth rate to test in h-1, might have to be > 0.0 depending on targets
:param float gr_max: maximum growth rate to test in h-1
:param float bisection_tol: (optional, default 1e-3) stop criteria based on growth rate tolerances
:param int max_iter: (optional, default 40) stop criteria based on number of iterations
:return: optimization solution
:rtype: class:`Solution`
"""
# first, check feasibility a minimal (or zero) growth
# with self.model:
self._set_growth_rate(gr_min)
opt_value = self.model.slim_optimize()
# bisection algorithm to narrow in on maximum growth rate
if math.isfinite(opt_value):
n_iter = 1
while ((gr_max - gr_min) > bisection_tol) and (n_iter < max_iter):
gr_test = gr_min + 0.5 * (gr_max - gr_min)
# with self.model:
self._set_growth_rate(gr_test)
opt_value = self.model.slim_optimize()
if math.isfinite(opt_value):
gr_min = gr_test
else:
gr_max = gr_test
n_iter += 1
# collect optimization solution at optimum growth rate
if (gr_max - gr_min) < bisection_tol:
gr_opt = gr_min
self._set_growth_rate(gr_opt)
solution = self.model.optimize()
# if solution with gr_min is no longer optimal, lower gr_opt slightly and try again
if solution.status != 'optimal':
gr_opt = max(0.0, gr_opt - 0.5 * bisection_tol)
solution = self.model.optimize()
solution.objective_value = gr_opt
solution.n_iter = n_iter
solution.density_constraints = {met.id: -val for met, val
in self.model.reactions.get_by_id(pf.V_TCD).metabolites.items()}
return solution
else:
print('no optimal growth rate')
return None
else:
print(f'Problem infeasible at minimum growth of {gr_min}')
return None
[docs]
def single_gene_deletion(self, genes=None, solution=None, **kwargs):
"""Perform a single gene deletion analysis for RBA based models using gurobipy interface.
Interface aligned to COBRApy single_gene_deletion() method.
Perform single gene deletion simulations for provided list of gene in `genes`.
If `genes` is not provided, perform gene deletion for all genes that may be
active in the wild type solution. In case a gene is not required in the wild type solution, a knockout
simulation is not performed for this gene. Its growth rate value is set to the wild type value and
its optimization status is set to `wt_solution`.
A wild type solution can be provided with parameter `solution`, alternatively a wild type solution
is determined automatically.
Following keyword arguments can be added to the list of parameters:
`gr_min`: (default 0.0) minimum growth rate to test in h-1,
`gr_min_alt`: (optional) alternative minimum growth rate in h-1 (used, if infeasible solution with gr_min),
`gr_max`: (default 1.5) maximum growth rate to test in h-1,
`bisection_tol`: (default 1e-5) stop criteria based on growth rate tolerance,
`max_iter`: (default 40) stop criteria based on number of iterations.
:param list or set genes: (optional) gene ids,
:param solution: (optional) wild type RBA solution
:type solution: :class:`Solution`
:param kwargs: keyword arguments used for RBA bisectional optimization
:return: single gene deletion results per gene with growth rate in h-1, optimization status and fitness value
:rtype: pandas.DataFrame
"""
if self.is_gpm is None:
print('Method implemented for gurobipy interface only.')
return pd.DataFrame()
alt_kwargs = kwargs.copy()
if 'gr_min_alt' in kwargs:
alt_kwargs['gr_min'] = kwargs.pop('gr_min_alt')
del alt_kwargs['gr_min_alt']
# determine wild type growth rate and fluxes, if solution not provided
if solution is None:
solution = self.solve(**kwargs)
wt_gr = solution.objective_value
wt_fluxes = dict(RbaResults(self, {'wt': solution}).collect_fluxes()['wt'])
all_genes = set(self.m_dict['fbcGeneProducts'].label.values)
tx_genes, metab_genes = self.get_tx_metab_genes()
pm_genes = all_genes.difference(tx_genes.union(metab_genes))
# determine gene list, if not provided
if genes is None:
genes = self.get_active_genes(wt_fluxes)
else:
all_genes = genes
# optimization loop for single gene deletions
cols = ['growth_rate', 'status', 'fitness']
sgko_results = {'wt': [wt_gr, 'wt_solution', 1.0]}
for gene in tqdm.tqdm(sorted(all_genes)):
if gene in genes:
# simulate a single gene deletion
orig_rid_bounds = self.gene_knock_outs(gene)
solution = self.solve(**kwargs)
#if (solution is not None) and solution.status == 'infeasible':
# print(f'first solution infeasible - reset model {gene}')
# self.gpm.reset()
# solution = self.solve(**alt_kwargs)
# if solution is None:
# print(f'second solution infeasible {gene}')
# else:
# print(f'solution status second attempt for {gene}: {solution.status}')
self.set_variable_bounds(orig_rid_bounds)
# process simulation result
if solution is None:
sgko_results[gene] = [0.0, 'infeasible', 0.0]
elif solution.status in {'optimal'}:
mutant_gr = solution.objective_value
sgko_results[gene] = [mutant_gr, solution.status, mutant_gr / wt_gr]
else:
sgko_results[gene] = [0.0, solution.status, 0.0]
elif gene in pm_genes:
sgko_results[gene] = [0.0, 'process_machine']
else:
# genes not in gene_list are assumed to not impact the wild type solution
sgko_results[gene] = [wt_gr, 'wt_solution', 1.0]
df_sgko = pd.DataFrame(sgko_results.values(), index=list(sgko_results), columns=cols)
df_sgko.index.name = 'gene'
return df_sgko