Skip to content

Commit

Permalink
Update #126
Browse files Browse the repository at this point in the history
  • Loading branch information
cb-Hades committed Aug 21, 2024
1 parent cd4e354 commit 21ada1a
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 27 deletions.
2 changes: 1 addition & 1 deletion dev/gapfill-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1258,7 +1258,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
"version": "3.10.14"
}
},
"nbformat": 4,
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ dependencies = [
"gffutils>=0.10.1",
"libchebipy>=1.0.10",
"markupsafe>=2.0.1",
"masschargecuration@git+https://github.com/Biomathsys/MassChargeCuration@installation-fix", # @TODO wait for pull request
"masschargecuration@git+https://github.com/Biomathsys/MassChargeCuration@installation-fix", # @NOTE wait for pull request
'matplotlib >= 3.8.2',
"memote>=0.17.0",
"multiprocess>=0.70.16",
"numpy>=1.20.3,<2.0.0", # @TODO wait for update
"numpy>=1.20.3,<2.0.0", # @TODO wait for update? >2.0.0 not working properly
"ols-client>=0.1.3",
"pandas>=1.2.4",
'pyyaml>=6.0.1',
Expand Down
20 changes: 13 additions & 7 deletions src/refinegems/classes/gapfill.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,10 @@ def _map_ec_to_reac_kegg(unmapped_reacs:pd.DataFrame) -> pd.DataFrame:
"""

# get KEGG EC number information
kegg_mapped = pd.DataFrame.from_dict(list(unmapped_reacs.progress_apply(parse_KEGG_ec)))
eccodes = unmapped_reacs['ec-code']
kegg_mapped = pd.DataFrame.from_dict(list(eccodes.progress_apply(parse_KEGG_ec)))
kegg_mapped = unmapped_reacs.merge(kegg_mapped, on='ec-code')

kegg_mapped['is_transport'] = None
kegg_mapped['via'] = kegg_mapped['id'].apply(lambda x: 'KEGG' if x else None)
kegg_mapped.explode(column='id')
Expand All @@ -316,6 +319,7 @@ def _map_ec_to_reac_kegg(unmapped_reacs:pd.DataFrame) -> pd.DataFrame:
table = pd.concat([to_map,table[~table['id'].isna()]])
else:
table = _map_ec_to_reac_bigg(table)


# map to KEGG
if use_KEGG:
Expand Down Expand Up @@ -368,7 +372,7 @@ def __init__(self) -> None:
self.missing_reactions = None # missing reacs, that have not yet been sorted into any category

# general information
self.geneid_type = 'ncbi' # @TODO
self.geneid_type = 'ncbi' # @TODO more options?

# collect stats & Co, can be extended by subclasses
self._statistics = {
Expand Down Expand Up @@ -680,6 +684,7 @@ def add_reactions_from_table(self, model:cobra.Model,

# @TODO : logging
# @TODO : save stuff for report / manual curation
# @DISCUSSION: Check for futile cycles after each addition of a new reaction?
# @TEST - only tested in debugging mode with GeneGapFiller
def fill_model(self, model:Union[cobra.Model,libModel],
**kwargs) -> libModel:
Expand Down Expand Up @@ -749,7 +754,7 @@ def fill_model(self, model:Union[cobra.Model,libModel],
cobramodel = load_model(tmp.name,'cobra')

# .......................
# @DEBUGGING
# @DEBUG
# if len(self.missing_reacs) > 10:
# self.missing_reacs = self.missing_reacs.sample(10)
# print('fill_model: Running in debugging mode')
Expand Down Expand Up @@ -883,7 +888,7 @@ def get_model_genes(model: libModel) -> pd.DataFrame:

# Step 5: map to EC via KEGG
# --------------------------
# @DEBUGGING ...................
# @DEBUG .......................
# genes_not_in_model = genes_not_in_model.iloc[330:350,:]
# print(UserWarning('Running in debugging mode.'))
# ..............................
Expand Down Expand Up @@ -936,7 +941,9 @@ def find_missing_reacs(self,model:cobra.Model):
# ----------------------
# Gapfilling with BioCyc
# ----------------------

# @TODO: Possibility to add KEGG Reaction & MetaNetX IDs to SmartTable
# -> So far these are empty for my test cases
# -> Could be added in a future update
class BioCycGapFiller(GapFiller):
"""
| @TODO: Write correct doc string
Expand Down Expand Up @@ -1348,7 +1355,7 @@ def find_missing_reacs(self, model:cobra.Model,
# Case 2: still no EC but ncbiprotein
# -----------------------------------
# -> access ncbi for ec (optional)
# @DEBUGGING ...................
# @DEBUG .......................
# mapped_reacs = mapped_reacs.iloc[300:350,:]
# print(UserWarning('Running in debugging mode.'))
# ..............................
Expand Down Expand Up @@ -1412,7 +1419,6 @@ def find_missing_reacs(self, model:cobra.Model,
# For filtering
############################################################################
# Evtl hier:

# Inspired by Dr. Reihaneh Mostolizadeh's function to add BioCyc reactions to a model
def replace_reaction_direction_with_fluxes(missing_reacs: pd.DataFrame) -> pd.DataFrame:
"""Extracts the flux lower & upper bounds for each reaction through the entries in column 'Reaction-Direction'
Expand Down
2 changes: 1 addition & 1 deletion src/refinegems/curation/curate.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ def resolve_duplicate_reactions(model:cobra.Model, based_on:str='reaction', remo
keep_reac.gene_reaction_rule = gpr_to_add # add the one existing the other
else:
pass # nothing to add
model.reactions.get_by_id(r_id).delete()
model.reactions.get_by_id(r_id).remove_from_model() # @NOTE throws warning, but seems to be a cobra-internal issue
print(F'\tDuplicate reaction {r_id} found. Combined to {keep_reac.id} and deleted.')
else:
print(f'\tDuplicate reactions {", ".join(mnx[1]["id"].tolist())} found.')
Expand Down
10 changes: 7 additions & 3 deletions src/refinegems/curation/polish.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,11 +1469,15 @@ def check_direction(model:cobra.Model,data:Union[pd.DataFrame,str]) -> cobra.Mod
if not pd.isnull(direction):
if 'REVERSIBLE' in direction:
# set reaction as reversible by setting default values for upper and lower bounds
r.lower_bound = -1000.
r.lower_bound = cobra.Configuration().lower_bound
elif 'RIGHT-TO-LEFT' in direction:
# invert the default values for the boundaries
r.lower_bound = -1000.
r.upper_bound = 0.
r.lower_bound = cobra.Configuration().lower_bound
r.upper_bound = 0.0
elif 'LEFT-To-RIGHT' in direction:
# In case direction was already wrong
r.lower_bound = 0.0
r.upper_bound = cobra.Configuration().upper_bound
else:
# left to right case is the standart for adding reactions
# = nothing left to do
Expand Down
18 changes: 5 additions & 13 deletions src/refinegems/utility/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -1111,9 +1111,9 @@ def build_reaction_mnx(model:cobra.Model, id:str,

# set reversibility
if rev:
new_reac.bounds = (1000.0,1000.0)
new_reac.bounds = cobra.Configuration().bounds
else:
new_reac.bounds = (0.0,1000.0)
new_reac.bounds = (0.0,cobra.Configuration().upper_bound)

# get annotations
# ---------------
Expand All @@ -1125,7 +1125,7 @@ def build_reaction_mnx(model:cobra.Model, id:str,
new_reac.annotation[db] = [m.split(':',1)[1] for m in db_matches['source'].tolist()]
# update reactions direction, if MetaCyc has better information
if db == 'metacyc.reaction' and len(db_matches[db_matches['source'].str.contains('-->')]):
new_reac.bounds = (0.0,1000.0)
new_reac.bounds = (0.0,cobra.Configuration().upper_bound)
# add additional references from the parameter
_add_annotations_from_dict_cobra(references,new_reac)

Expand Down Expand Up @@ -1223,11 +1223,12 @@ def build_reaction_kegg(model:cobra.Model, id:str=None, reac_str:str=None,
# Reaction reconstruction from equation
# -------------------------------------

# skip, if not reaction string is available
# skip, if no reaction string is available
if not reac_str:
return None # reconstruction not possible

# parse reaction string
# @TODO filter out not useable reaction strings
reactants,products,comparts,rev = parse_reac_str(reac_str,'KEGG')

# ..............................................
Expand Down Expand Up @@ -1356,15 +1357,6 @@ def build_reaction_bigg(model:cobra.Model, id:str,
list:
List of matching reaction IDs (in model).
"""
# @TODO:
# 1. If no Reaction-Direction is provided set direction to reversible!
# 2. No compartment provided -> Set compartment to cytosol
# 3. Set charge to zero?
# 4. Get information on formula with requests? biocyc api seems not helpful!
# 5. In Reihaneh's/Finn Mier's code in py4gems all reactants have stoichiometry -1 and all products 1...
# -> Found example that stoichiometry in equation! Split at spaces to get?
# @DISCUSSION: Still no clue on how to map the names in the equations to actual biocyc compound IDs...
# @DISCUSSION: Check for futile cycles after each addition of a new reaction?

# ---------------------
# check, if ID in model
Expand Down

0 comments on commit 21ada1a

Please sign in to comment.