From 5532912c1bdc4c7e6caf4bfadfa80f324116bdce Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Tue, 3 Oct 2023 11:17:04 -0400 Subject: [PATCH 01/15] switch rdkit to the conda-forge channel --- environment.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/environment.yml b/environment.yml index 4596f580ee0..e3ac74d620e 100644 --- a/environment.yml +++ b/environment.yml @@ -44,6 +44,7 @@ dependencies: - conda-forge::mopac - conda-forge::cclib >=1.6.3,!=1.8.0 - conda-forge::openbabel >= 3 + - conda-forge::rdkit >=2020.03.3.0 # general-purpose external software tools - conda-forge::julia=1.9.1 @@ -99,10 +100,6 @@ dependencies: # Our build of this is version 0.0.1 (!!) and we are using parts # of the API that are now gone. Need a serious PR to fix this. - - rmg::rdkit >=2020.03.3.0 - # We should use the official channel, not sure how difficult this - # change will be. - # conda mutex metapackage - nomkl From 9346621f36e10d7d4f01f77e8154ebd7705b86e1 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 4 Oct 2023 00:30:25 -0400 Subject: [PATCH 02/15] Update RDKit version to >=2022.09.1 --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index e3ac74d620e..38b84329229 100644 --- a/environment.yml +++ b/environment.yml @@ -44,7 +44,7 @@ dependencies: - conda-forge::mopac - conda-forge::cclib >=1.6.3,!=1.8.0 - conda-forge::openbabel >= 3 - - conda-forge::rdkit >=2020.03.3.0 + - conda-forge::rdkit >=2022.09.1 # general-purpose external software tools - conda-forge::julia=1.9.1 From ea1772e0551f2ec3bd7da61f2f5ae04fd4178cd2 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 4 Oct 2023 00:33:43 -0400 Subject: [PATCH 03/15] Update the ref inchi used in translatorTest for ch2o2 In some of the early versions of RDKit, a different (but valid) inchi will be generated by RDKit comparing to Openbabel. In a version between 2020.03.3 to 2022.09.1, RDKit changes (potentially improve ) the inchi handling, and now both of them return the same inchi, which is the one we used for unit tests in this commit. --- test/rmgpy/molecule/translatorTest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/rmgpy/molecule/translatorTest.py b/test/rmgpy/molecule/translatorTest.py index 70cd62619df..b711fc37430 100644 --- a/test/rmgpy/molecule/translatorTest.py +++ b/test/rmgpy/molecule/translatorTest.py @@ -238,7 +238,7 @@ def test_ch2o2(self): 3 O 1 {1,S} """ - aug_inchi = "InChI=1/CH2O2/c2-1-3/h1H,(H,2,3)/u1,2" + aug_inchi = "InChI=1/CH2O2/c2-1-3/h1-2H/u1,3" self.compare(adjlist, aug_inchi) def test_c7h10(self): @@ -1411,8 +1411,8 @@ def test_c3h3o3(self): self.compare(inchi, u_indices) def test_ch2o2(self): - inchi = "CH2O2/c2-1-3/h1H,(H,2,3)" - u_indices = [1, 2] + inchi = "CH2O2/c2-1-3/h1-2H" + u_indices = [1, 3] self.compare(inchi, u_indices) def test_c2h2o3(self): From 1ec9ebaee43d4e779dec47fccab8bcc73cd66bbf Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 4 Oct 2023 00:38:40 -0400 Subject: [PATCH 04/15] Update molecule and species object inchi conversion to enable backend Previously, backend is rarely changed when converting a molecule to its inchi /inchikey. An argument is now added to allow user define which backend to use. --- rmgpy/molecule/molecule.pxd | 10 +++++----- rmgpy/molecule/molecule.py | 40 ++++++++++++++++++++++++------------- rmgpy/qm/molecule.py | 4 ++-- rmgpy/species.py | 8 ++++---- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/rmgpy/molecule/molecule.pxd b/rmgpy/molecule/molecule.pxd index 4590bbed6be..f227e5d5334 100644 --- a/rmgpy/molecule/molecule.pxd +++ b/rmgpy/molecule/molecule.pxd @@ -224,14 +224,14 @@ cdef class Molecule(Graph): bint raise_charge_exception=?, bint check_consistency=?) cpdef from_xyz(self, np.ndarray atomic_nums, np.ndarray coordinates, float critical_distance_factor=?, bint raise_atomtype_exception=?) - - cpdef str to_inchi(self) - cpdef str to_augmented_inchi(self) + cpdef str to_inchi(self, str backend=?) + + cpdef str to_augmented_inchi(self, str backend=?) - cpdef str to_inchi_key(self) + cpdef str to_inchi_key(self, str backend=?) - cpdef str to_augmented_inchi_key(self) + cpdef str to_augmented_inchi_key(self, str backend=?) cpdef str to_smiles(self) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index e93f0092ebd..3bea18e699f 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -1863,62 +1863,74 @@ def to_single_bonds(self, raise_atomtype_exception=True): new_mol.update_atomtypes(raise_exception=raise_atomtype_exception) return new_mol - def to_inchi(self): + def to_inchi(self, backend='rdkit-first'): """ Convert a molecular structure to an InChI string. Uses `RDKit `_ to perform the conversion. Perceives aromaticity. - + or - + Convert a molecular structure to an InChI string. Uses `OpenBabel `_ to perform the conversion. + + Available options for InChI backend: 'rdkit-first' (default), + 'try-all', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi(self) + return translator.to_inchi(self, backend=backend) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise - def to_augmented_inchi(self): + def to_augmented_inchi(self, backend='rdkit-first'): """ Adds an extra layer to the InChI denoting the multiplicity of the molecule. - + Separate layer with a forward slash character. + + Available options for InChI backend: 'rdkit-first' (default), + 'try-all', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi(self, aug_level=2) + return translator.to_inchi(self, backend=backend, aug_level=2) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise - def to_inchi_key(self): + def to_inchi_key(self, backend='rdkit-first'): """ Convert a molecular structure to an InChI Key string. Uses `OpenBabel `_ to perform the conversion. - - or - + + or + Convert a molecular structure to an InChI Key string. Uses `RDKit `_ to perform the conversion. + + Available options for InChI backend: 'rdkit-first' (default), + 'try-all', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi_key(self) + return translator.to_inchi_key(self, backend=backend) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise - def to_augmented_inchi_key(self): + def to_augmented_inchi_key(self, backend='rdkit-first'): """ Adds an extra layer to the InChIKey denoting the multiplicity of the molecule. Simply append the multiplicity string, do not separate by a character like forward slash. + + Available options for InChI backend: 'rdkit-first' (default), + 'try-all', 'rdkit', or 'openbabel'. """ try: - return translator.to_inchi_key(self, aug_level=2) + return translator.to_inchi_key(self, backend=backend, aug_level=2) except: logging.exception(f"Error for molecule \n{self.to_adjacency_list()}") raise diff --git a/rmgpy/qm/molecule.py b/rmgpy/qm/molecule.py index c3a9a9fae46..81d771781d0 100644 --- a/rmgpy/qm/molecule.py +++ b/rmgpy/qm/molecule.py @@ -520,11 +520,11 @@ def load_thermo_data(self): self.qm_data = local_context["qmData"] return thermo - def get_augmented_inchi_key(self): + def get_augmented_inchi_key(self, backend='rdkit-first'): """ Returns the augmented InChI from self.molecule """ - return self.molecule.to_augmented_inchi_key() + return self.molecule.to_augmented_inchi_key(backend=backend) def get_mol_file_path_for_calculation(self, attempt): """ diff --git a/rmgpy/species.py b/rmgpy/species.py index de0c3a2d9b1..e0ae40a2f0a 100644 --- a/rmgpy/species.py +++ b/rmgpy/species.py @@ -740,17 +740,17 @@ def copy(self, deep=False): return other - def get_augmented_inchi(self): + def get_augmented_inchi(self, backend='rdkit-first'): if self.aug_inchi is None: - self.aug_inchi = self.generate_aug_inchi() + self.aug_inchi = self.generate_aug_inchi(backend=backend) return self.aug_inchi - def generate_aug_inchi(self): + def generate_aug_inchi(self, backend='rdkit-first'): candidates = [] self.generate_resonance_structures() for mol in self.molecule: try: - cand = [mol.to_augmented_inchi(), mol] + cand = [mol.to_augmented_inchi(backend=backend), mol] except ValueError: pass # not all resonance structures can be parsed into InChI (e.g. if containing a hypervalance atom) else: From 1b9039098c95106f310aff65d5b70e75e03a2164 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 4 Oct 2023 07:48:00 -0400 Subject: [PATCH 05/15] Skip the unit test for failure inchi translation The error was due to an bug of RDKit, returning an InChI with less atoms for CH2NN. This bug has been fixed in the newer version of RDKit. Temporarily skip this test until we find a new problematic example. --- test/rmgpy/molecule/translatorTest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/rmgpy/molecule/translatorTest.py b/test/rmgpy/molecule/translatorTest.py index b711fc37430..cb62d9dc97e 100644 --- a/test/rmgpy/molecule/translatorTest.py +++ b/test/rmgpy/molecule/translatorTest.py @@ -57,7 +57,8 @@ def test_empty_molecule(self): assert mol.to_smiles() == "" assert mol.to_inchi() == "" - @patch("rmgpy.molecule.translator.logging") + @pytest.mark.skip(reason='This unit test checks for a bug which has been ' + 'patched in version of RDKit >= 2022.9.1.') def test_failure_message(self, mock_logging): """Test that we log the molecule adjlist upon failure.""" mol = Molecule(smiles="[CH2-][N+]#N") From 3706001ea1effff5b07abf768a319a86e1b5deca Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Mon, 16 Oct 2023 10:58:13 -0500 Subject: [PATCH 06/15] remove rdkit logger before making the checkModels logger see the comments in the source code for more details --- scripts/checkModels.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/checkModels.py b/scripts/checkModels.py index 2ebd814d1e8..7d62b99e832 100644 --- a/scripts/checkModels.py +++ b/scripts/checkModels.py @@ -284,6 +284,18 @@ def initialize_log(verbose, log_file_name='checkModels.log'): `verbose` parameter is an integer specifying the amount of log text seen at the console; the levels correspond to those of the :data:`logging` module. """ + # since RDKit 2022.03.1, logging is done using the Python logger instead of the + # Cout streams. This does not affect running RMG normally, but this testing file + # only works properly if it is the only logger + # see https://github.com/rdkit/rdkit/pull/4846 for the changes in RDKit + + # clear all other existing loggers + # https://stackoverflow.com/a/12158233 + for handler in logging.root.handlers[:]: + logging.root.removeHandler(handler) + + # once moved to a more recent python (at least 3.8), just add force=true to this statement + # and remove the above logging.basicConfig( filename=log_file_name, filemode='w', From 55d39b6a49d5c184908452c209b413854f295879 Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Mon, 16 Oct 2023 14:40:25 -0400 Subject: [PATCH 07/15] explicitly pull descriptastorus from conda-forge instead of rmg --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 38b84329229..3dc6b3175df 100644 --- a/environment.yml +++ b/environment.yml @@ -45,6 +45,7 @@ dependencies: - conda-forge::cclib >=1.6.3,!=1.8.0 - conda-forge::openbabel >= 3 - conda-forge::rdkit >=2022.09.1 + - conda-forge::descriptastorus # requirement of chemprop # general-purpose external software tools - conda-forge::julia=1.9.1 From 72886d168fd1d791c0d5cac42ac07f9b7b0cfcba Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Mon, 16 Oct 2023 15:37:42 -0400 Subject: [PATCH 08/15] update changelog, limit descriptastorus version for scipy compatibility --- environment.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 3dc6b3175df..0c18ef1f22d 100644 --- a/environment.yml +++ b/environment.yml @@ -14,6 +14,7 @@ # Changelog: # - May 15, 2023 Added this changelog, added inline documentation, # made dependency list more explicit (@JacksonBurns). +# - October 16, 2023 Switched RDKit and descripatastorus to conda-forge # name: rmg_env channels: @@ -45,7 +46,12 @@ dependencies: - conda-forge::cclib >=1.6.3,!=1.8.0 - conda-forge::openbabel >= 3 - conda-forge::rdkit >=2022.09.1 - - conda-forge::descriptastorus # requirement of chemprop + # requirement of chemprop + # change this to >=2.6.1 upon moving to Python 3.11+, see + # https://github.com/chemprop/chemprop/issues/428 for why + # TL;DR: scipy introduced incompatible API changes with 2.6.1+ of + # descriptastorus + - conda-forge::descriptastorus <2.6.1 # general-purpose external software tools - conda-forge::julia=1.9.1 @@ -56,7 +62,7 @@ dependencies: - coverage - cython >=0.25.2 - scikit-learn - - scipy + - scipy <1.11 - numpy >=1.10.0 - pydot - jinja2 From ffa6d8ff326083a68f9552ed1d74a3b9fb918dcc Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Tue, 17 Oct 2023 10:56:12 -0500 Subject: [PATCH 09/15] temporarily remove `chemprop` support completely --- .conda/meta.yaml | 1 - environment.yml | 7 ++----- rmgpy/ml/estimator.py | 14 ++++++++++++-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.conda/meta.yaml b/.conda/meta.yaml index b1df7678e67..7cd02969fe7 100644 --- a/.conda/meta.yaml +++ b/.conda/meta.yaml @@ -30,7 +30,6 @@ requirements: - cairocffi - cantera >=2.3.0 - cclib >=1.6.3 - - chemprop - coolprop - coverage - cython >=0.25.2 diff --git a/environment.yml b/environment.yml index 0c18ef1f22d..b13d9cc50fd 100644 --- a/environment.yml +++ b/environment.yml @@ -14,7 +14,8 @@ # Changelog: # - May 15, 2023 Added this changelog, added inline documentation, # made dependency list more explicit (@JacksonBurns). -# - October 16, 2023 Switched RDKit and descripatastorus to conda-forge +# - October 16, 2023 Switched RDKit and descripatastorus to conda-forge, +# moved diffeqpy to pip and (temporarily) removed chemprop # name: rmg_env channels: @@ -103,10 +104,6 @@ dependencies: # It is only on pip, so we will need to do something like: # https://stackoverflow.com/a/35245610 - - rmg::chemprop - # Our build of this is version 0.0.1 (!!) and we are using parts - # of the API that are now gone. Need a serious PR to fix this. - # conda mutex metapackage - nomkl diff --git a/rmgpy/ml/estimator.py b/rmgpy/ml/estimator.py index 84cafb651cc..81e1d9e701e 100644 --- a/rmgpy/ml/estimator.py +++ b/rmgpy/ml/estimator.py @@ -32,10 +32,10 @@ from argparse import Namespace from typing import Callable, Union +chemprop = None try: import chemprop except ImportError as e: - chemprop = None chemprop_exception = e import numpy as np @@ -43,6 +43,16 @@ from rmgpy.species import Species from rmgpy.thermo import ThermoData +ADMONITION = """ +Support for predicting thermochemistry using chemprop has been temporarily removed +from RMG, pending official chemprop support for Python 3.11 and newer. + +To use chemprop and RMG, install a previous version of RMG (3.1.1 or earlier). + +See the link below for status of re-integration of chemprop: +https://github.com/ReactionMechanismGenerator/RMG-Py/issues/2559 +""" + class MLEstimator: """ @@ -118,7 +128,7 @@ def load_estimator(model_dir: str) -> Callable[[str], np.ndarray]: if chemprop is None: # Delay chemprop ImportError until we actually try to use it # so that RMG can load successfully without chemprop. - raise chemprop_exception + raise RuntimeError(ADMONITION + "\nOriginal Exception:\n" + str(chemprop_exception)) args = Namespace() # Simple class to hold attributes From d3359aadb38fe04bd03912f97f73d796f6be927b Mon Sep 17 00:00:00 2001 From: Jackson Burns <33505528+JacksonBurns@users.noreply.github.com> Date: Tue, 17 Oct 2023 10:57:09 -0500 Subject: [PATCH 10/15] remove descriptastorus since chemprop is gone --- environment.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/environment.yml b/environment.yml index b13d9cc50fd..4ef4b97a7b9 100644 --- a/environment.yml +++ b/environment.yml @@ -47,12 +47,6 @@ dependencies: - conda-forge::cclib >=1.6.3,!=1.8.0 - conda-forge::openbabel >= 3 - conda-forge::rdkit >=2022.09.1 - # requirement of chemprop - # change this to >=2.6.1 upon moving to Python 3.11+, see - # https://github.com/chemprop/chemprop/issues/428 for why - # TL;DR: scipy introduced incompatible API changes with 2.6.1+ of - # descriptastorus - - conda-forge::descriptastorus <2.6.1 # general-purpose external software tools - conda-forge::julia=1.9.1 From 07d7a17a5641ae75501955917d9bc7d9921882f7 Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Thu, 7 Mar 2024 12:47:37 -0500 Subject: [PATCH 11/15] skip tests which require chemprop --- test/rmgpy/data/thermoTest.py | 14 ++++++++------ test/rmgpy/ml/estimatorTest.py | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/test/rmgpy/data/thermoTest.py b/test/rmgpy/data/thermoTest.py index f4628100073..7360b6f21ea 100644 --- a/test/rmgpy/data/thermoTest.py +++ b/test/rmgpy/data/thermoTest.py @@ -53,7 +53,7 @@ split_bicyclic_into_single_rings, ) from rmgpy.exceptions import DatabaseError -from rmgpy.ml.estimator import MLEstimator +from rmgpy.ml.estimator import MLEstimator, ADMONITION from rmgpy.molecule.molecule import Molecule from rmgpy.quantity import Quantity from rmgpy.species import Species @@ -123,11 +123,11 @@ def setup_class(cls): ) cls.databaseWithoutLibraries.set_binding_energies("Pt111") - # Set up ML estimator - models_path = os.path.join(settings["database.directory"], "thermo", "ml", "main") - hf298_path = os.path.join(models_path, "hf298") - s298_cp_path = os.path.join(models_path, "s298_cp") - cls.ml_estimator = MLEstimator(hf298_path, s298_cp_path) + # Set up ML estimator - temporarily removed, see rmgpy.ml.estimator + # models_path = os.path.join(settings["database.directory"], "thermo", "ml", "main") + # hf298_path = os.path.join(models_path, "hf298") + # s298_cp_path = os.path.join(models_path, "s298_cp") + # cls.ml_estimator = MLEstimator(hf298_path, s298_cp_path) def test_pickle(self): """ @@ -602,6 +602,7 @@ def test_species_thermo_generation_library(self): assert arom.is_isomorphic(spec.molecule[0]) # The aromatic structure should now be the first one assert "library" in thermo.comment, "Thermo not found from library, test purpose not fulfilled." + @pytest.mark.skip(reason=ADMONITION) def test_species_thermo_generation_ml(self): """Test thermo generation for species objects based on ML estimation.""" @@ -652,6 +653,7 @@ def test_species_thermo_generation_ml(self): assert thermo1 is None assert thermo2 is None + @pytest.mark.skip(reason=ADMONITION) def test_thermo_generation_ml_settings(self): """Test that thermo generation with ML correctly respects settings""" diff --git a/test/rmgpy/ml/estimatorTest.py b/test/rmgpy/ml/estimatorTest.py index bd2213dc1be..bd3fc9574b6 100644 --- a/test/rmgpy/ml/estimatorTest.py +++ b/test/rmgpy/ml/estimatorTest.py @@ -31,9 +31,11 @@ from rmgpy import settings -from rmgpy.ml.estimator import MLEstimator +from rmgpy.ml.estimator import MLEstimator, ADMONITION +import pytest +@pytest.mark.skip(reason=ADMONITION) class TestMLEstimator: """ Contains unit tests for rmgpy.ml.estimator From e47a23f9a3cfc6080fb44a454cd4a17717c384c4 Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Thu, 7 Mar 2024 13:57:46 -0500 Subject: [PATCH 12/15] skip input mlestimator test --- test/rmgpy/rmg/inputTest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/rmgpy/rmg/inputTest.py b/test/rmgpy/rmg/inputTest.py index ef69ab6ade0..95f53addea3 100644 --- a/test/rmgpy/rmg/inputTest.py +++ b/test/rmgpy/rmg/inputTest.py @@ -31,6 +31,7 @@ import rmgpy.rmg.input as inp from rmgpy.rmg.main import RMG +from rmgpy.ml.estimator import ADMONITION import pytest @@ -92,7 +93,7 @@ def test_importing_database_reaction_libraries_from_true_tuple(self): assert isinstance(rmg.reaction_libraries[0], tuple) assert rmg.reaction_libraries[0][1] - +@pytest.mark.skip(reason=ADMONITION) class TestInputMLEstimator: """ Contains unit tests rmgpy.rmg.input.mlEstimator From a23b5f303ca73567c241b6139e418cee53b7b7a1 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Thu, 7 Mar 2024 18:31:34 -0500 Subject: [PATCH 13/15] Update docstring of translator to reflect the default backend usage --- rmgpy/molecule/translator.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 1fd0cd22205..2bedf68abc6 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -169,7 +169,7 @@ def to_inchi(mol, backend='rdkit-first', aug_level=0): Uses RDKit or OpenBabel for conversion. Args: - backend choice of backend, 'try-all', 'rdkit', or 'openbabel' + backend choice of backend, 'rdkit-first' (default), 'try-all', 'rdkit', or 'openbabel' aug_level level of augmentation, 0, 1, or 2 """ cython.declare(inchi=str, ulayer=str, player=str, mlayer=str) @@ -205,7 +205,7 @@ def to_inchi_key(mol, backend='rdkit-first', aug_level=0): Uses RDKit or OpenBabel for conversion. Args: - backend choice of backend, 'try-all', 'rdkit', or 'openbabel' + backend choice of backend, 'rdkit-first' (default), 'try-all', 'rdkit', or 'openbabel' aug_level level of augmentation, 0, 1, or 2 """ cython.declare(key=str, ulayer=str, player=str, mlayer=str) @@ -277,8 +277,8 @@ def to_smiles(mol, backend='default'): def from_inchi(mol, inchistr, backend='try-all', raise_atomtype_exception=True): """ Convert an InChI string `inchistr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting - rdkit (default) and openbabel. + a user-specified backend for conversion, currently supporting 'try-all' (default), rdkit-first, + rdkit, and openbabel. """ if inchiutil.INCHI_PREFIX in inchistr: return _read(mol, inchistr, 'inchi', backend, raise_atomtype_exception=raise_atomtype_exception) @@ -328,7 +328,7 @@ def from_smarts(mol, smartsstr, backend='rdkit', raise_atomtype_exception=True): def from_smiles(mol, smilesstr, backend='try-all', raise_atomtype_exception=True): """ Convert a SMILES string `smilesstr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting + a user-specified backend for conversion, currently supporting try-all (default), rdkit-first, rdkit (default) and openbabel. """ return _read(mol, smilesstr, 'smi', backend, raise_atomtype_exception=raise_atomtype_exception) From d8d430f6c2eb9bf6b787a22f5f53375674eca34f Mon Sep 17 00:00:00 2001 From: Jackson Burns Date: Wed, 13 Mar 2024 10:21:14 -0400 Subject: [PATCH 14/15] remove scipy version restriction, clarify diffeqpy removal issue former is not needed since the descriptastorus (which was incompatible with latest scipy and required this limitation) is no longer in the dep list --- environment.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 4ef4b97a7b9..97b22265fe2 100644 --- a/environment.yml +++ b/environment.yml @@ -97,6 +97,8 @@ dependencies: # rather than ours (which is only made so that we can get it from conda) # It is only on pip, so we will need to do something like: # https://stackoverflow.com/a/35245610 + # Note that _some other_ dep. in this list requires diffeqpy in its recipe + # which will cause it to be downloaded from the rmg conda channel # conda mutex metapackage - nomkl From 7496009830e1db9b809acdde4ff96d630b44c114 Mon Sep 17 00:00:00 2001 From: Xiaorui Dong Date: Wed, 13 Mar 2024 10:45:19 -0400 Subject: [PATCH 15/15] Renaming InChI/SMILES backend option try-all to openbabel-first try-all is a bit of confusing, while its actual behavior is using openbabel-first whenever possible. Although at rmgpy.molecule.translator line 41-46, there is a try/except to check if openbabel is available, and sometimes only RDKit is included in the BACKEND; Given openbabel is by default installed in the RMG-Py environment, it should be reasonable to call it openbabel-first. --- arkane/common.py | 4 ++-- rmgpy/molecule/molecule.py | 32 ++++++++++++++++++++++---------- rmgpy/molecule/translator.py | 20 ++++++++++---------- 3 files changed, 34 insertions(+), 22 deletions(-) diff --git a/arkane/common.py b/arkane/common.py index 5227298895c..a706417b886 100644 --- a/arkane/common.py +++ b/arkane/common.py @@ -191,11 +191,11 @@ def update_species_attributes(self, species=None): self.multiplicity = species.molecule[0].multiplicity self.formula = species.molecule[0].get_formula() try: - inchi = to_inchi(species.molecule[0], backend='try-all', aug_level=0) + inchi = to_inchi(species.molecule[0], backend='openbabel-first', aug_level=0) except ValueError: inchi = '' try: - inchi_key = to_inchi_key(species.molecule[0], backend='try-all', aug_level=0) + inchi_key = to_inchi_key(species.molecule[0], backend='openbabel-first', aug_level=0) except ValueError: inchi_key = '' self.inchi = inchi diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 3bea18e699f..21e5007c4d2 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -1768,9 +1768,13 @@ def _repr_png_(self): os.unlink(temp_file_name) return png - def from_inchi(self, inchistr, backend='try-all', raise_atomtype_exception=True): + def from_inchi(self, inchistr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert an InChI string `inchistr` to a molecular structure. + + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'. """ translator.from_inchi(self, inchistr, backend, raise_atomtype_exception=raise_atomtype_exception) return self @@ -1782,9 +1786,13 @@ def from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=True): translator.from_augmented_inchi(self, aug_inchi, raise_atomtype_exception=raise_atomtype_exception) return self - def from_smiles(self, smilesstr, backend='try-all', raise_atomtype_exception=True): + def from_smiles(self, smilesstr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert a SMILES string `smilesstr` to a molecular structure. + + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'openbabel-first'(default), 'rdkit-first', 'rdkit', or 'openbabel'. """ translator.from_smiles(self, smilesstr, backend, raise_atomtype_exception=raise_atomtype_exception) return self @@ -1874,8 +1882,9 @@ def to_inchi(self, backend='rdkit-first'): Convert a molecular structure to an InChI string. Uses `OpenBabel `_ to perform the conversion. - Available options for InChI backend: 'rdkit-first' (default), - 'try-all', 'rdkit', or 'openbabel'. + It is possible to use a single backend or try different backends in sequence. + The available options for the ``backend`` argument: 'rdkit-first'(default), + 'openbabel-first', 'rdkit', or 'openbabel'. """ try: return translator.to_inchi(self, backend=backend) @@ -1890,8 +1899,9 @@ def to_augmented_inchi(self, backend='rdkit-first'): Separate layer with a forward slash character. - Available options for InChI backend: 'rdkit-first' (default), - 'try-all', 'rdkit', or 'openbabel'. + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'. """ try: return translator.to_inchi(self, backend=backend, aug_level=2) @@ -1909,8 +1919,9 @@ def to_inchi_key(self, backend='rdkit-first'): Convert a molecular structure to an InChI Key string. Uses `RDKit `_ to perform the conversion. - Available options for InChI backend: 'rdkit-first' (default), - 'try-all', 'rdkit', or 'openbabel'. + It is possible to use a single backend or try different backends in sequence. + The available options for the ``backend`` argument: 'rdkit-first'(default), + 'openbabel-first', 'rdkit', or 'openbabel'. """ try: return translator.to_inchi_key(self, backend=backend) @@ -1926,8 +1937,9 @@ def to_augmented_inchi_key(self, backend='rdkit-first'): Simply append the multiplicity string, do not separate by a character like forward slash. - Available options for InChI backend: 'rdkit-first' (default), - 'try-all', 'rdkit', or 'openbabel'. + RDKit and Open Babel are the two backends used in RMG. It is possible to use a + single backend or try different backends in sequence. The available options for the ``backend`` + argument: 'rdkit-first'(default), 'openbabel-first', 'rdkit', or 'openbabel'. """ try: return translator.to_inchi_key(self, backend=backend, aug_level=2) diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 2bedf68abc6..731d8d8d8e3 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -169,7 +169,7 @@ def to_inchi(mol, backend='rdkit-first', aug_level=0): Uses RDKit or OpenBabel for conversion. Args: - backend choice of backend, 'rdkit-first' (default), 'try-all', 'rdkit', or 'openbabel' + backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel' aug_level level of augmentation, 0, 1, or 2 """ cython.declare(inchi=str, ulayer=str, player=str, mlayer=str) @@ -205,7 +205,7 @@ def to_inchi_key(mol, backend='rdkit-first', aug_level=0): Uses RDKit or OpenBabel for conversion. Args: - backend choice of backend, 'rdkit-first' (default), 'try-all', 'rdkit', or 'openbabel' + backend choice of backend, 'rdkit-first' (default), 'openbabel-first', 'rdkit', or 'openbabel' aug_level level of augmentation, 0, 1, or 2 """ cython.declare(key=str, ulayer=str, player=str, mlayer=str) @@ -274,10 +274,10 @@ def to_smiles(mol, backend='default'): return output -def from_inchi(mol, inchistr, backend='try-all', raise_atomtype_exception=True): +def from_inchi(mol, inchistr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert an InChI string `inchistr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting 'try-all' (default), rdkit-first, + a user-specified backend for conversion, currently supporting 'openbabel-first' (default), rdkit-first, rdkit, and openbabel. """ if inchiutil.INCHI_PREFIX in inchistr: @@ -325,11 +325,11 @@ def from_smarts(mol, smartsstr, backend='rdkit', raise_atomtype_exception=True): return _read(mol, smartsstr, 'sma', backend, raise_atomtype_exception=raise_atomtype_exception) -def from_smiles(mol, smilesstr, backend='try-all', raise_atomtype_exception=True): +def from_smiles(mol, smilesstr, backend='openbabel-first', raise_atomtype_exception=True): """ Convert a SMILES string `smilesstr` to a molecular structure. Uses - a user-specified backend for conversion, currently supporting try-all (default), rdkit-first, - rdkit (default) and openbabel. + a user-specified backend for conversion, currently supporting openbabel-first (default), rdkit-first, + rdkit and openbabel. """ return _read(mol, smilesstr, 'smi', backend, raise_atomtype_exception=raise_atomtype_exception) @@ -569,9 +569,9 @@ def _get_backend_list(backend): """ if not isinstance(backend, str): raise ValueError("The backend argument should be a string. " - "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'") + "Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'") backend = backend.strip().lower() - if backend == 'try-all': + if backend == 'openbabel-first': return BACKENDS elif backend == 'rdkit-first': return reversed(BACKENDS) @@ -579,4 +579,4 @@ def _get_backend_list(backend): return [backend] else: raise ValueError("Unrecognized value for backend argument. " - "Accepted values are 'try-all', 'rdkit-first', 'rdkit', and 'openbabel'") + "Accepted values are 'openbabel-first', 'rdkit-first', 'rdkit', and 'openbabel'")