From 4732aa1980d76f9f9c48e34a91031530a6a69a49 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Wed, 7 Feb 2024 13:53:08 -0500 Subject: [PATCH 1/4] use Ar workaround to construct surface molecules --- rmgpy/molecule/molecule.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 03727be792..034ee70654 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -984,7 +984,20 @@ def __init__(self, atoms=None, symmetry=-1, multiplicity=-187, reactive=True, pr self.from_inchi(inchi) self._inchi = inchi elif smiles: - self.from_smiles(smiles) + if 'X' in smiles: + self.from_smiles(smiles.replace('X', 'Ar')) + lines = self.to_adjacency_list().split('\n') + for i, line in enumerate(lines): + if 'Ar' in line: + lines[i] = lines[i].replace('Ar', 'X') + # remove any extra electron pairs + lines[i] = lines[i].replace('p3', 'p0') + lines[i] = lines[i].replace('p2', 'p0') + lines[i] = lines[i].replace('p1', 'p0') + adj_list = '\n'.join(lines) + self = self.from_adjacency_list(adj_list) + else: + self.from_smiles(smiles) self._smiles = smiles if multiplicity != -187: # it was set explicitly, so re-set it (from_smiles etc may have changed it) From ade423cdaa4d69cb201f5b4eb0c17d537cedbe92 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Fri, 18 Oct 2024 16:04:43 -0400 Subject: [PATCH 2/4] add surface SMILES test strings to unit tests --- test/rmgpy/molecule/moleculeTest.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/test/rmgpy/molecule/moleculeTest.py b/test/rmgpy/molecule/moleculeTest.py index 33e75dec58..37f5f27b84 100644 --- a/test/rmgpy/molecule/moleculeTest.py +++ b/test/rmgpy/molecule/moleculeTest.py @@ -1632,6 +1632,15 @@ def test_smiles(self): "CCCC", "O=C=O", "[C]#N", + "[X]", + "[X]C=C[X]", + "O[X]", + "CO[X]", + "[XH]", + "C=C[X]", + "CO.[X]", + "C#[X]", + "CCC(C)[X]" ] for s in test_strings: molecule = Molecule(smiles=s) From 16aad3055c107d4ea23620bcaa46e65d417ba0b3 Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Mon, 21 Oct 2024 15:41:58 -0400 Subject: [PATCH 3/4] add Pt atom type --- rmgpy/molecule/adjlist.py | 2 +- rmgpy/molecule/atomtype.py | 27 +++++++++++++++++----- rmgpy/molecule/element.py | 8 +++---- rmgpy/molecule/molecule.py | 35 +++++++++++++++++------------ test/rmgpy/molecule/moleculeTest.py | 11 ++++++++- 5 files changed, 57 insertions(+), 26 deletions(-) diff --git a/rmgpy/molecule/adjlist.py b/rmgpy/molecule/adjlist.py index 1ebe0ff110..265d24ebd0 100644 --- a/rmgpy/molecule/adjlist.py +++ b/rmgpy/molecule/adjlist.py @@ -92,7 +92,7 @@ def check_partial_charge(atom): the theoretical one: """ - if atom.symbol in {'X','L','R'}: + if atom.symbol in {'X','L','R'} or 'X' in [z.label for z in get_atomtype(atom, atom.bonds).generic]: return # because we can't check it. valence = PeriodicSystem.valence_electrons[atom.symbol] diff --git a/rmgpy/molecule/atomtype.py b/rmgpy/molecule/atomtype.py index 7ee6a1fe98..0f4ed1d44f 100644 --- a/rmgpy/molecule/atomtype.py +++ b/rmgpy/molecule/atomtype.py @@ -257,7 +257,7 @@ def get_features(self): 'Cl','Cl1s', 'Br','Br1s', 'I','I1s', - 'F','F1s','X','Xv','Xo']) + 'F','F1s','X','Xv','Xo','Pt','Ptv','Pto']) ATOMTYPES['Rx!H'] = AtomType(label='Rx!H', generic=['Rx'], specific=[ 'R!H', @@ -273,20 +273,29 @@ def get_features(self): 'Cl','Cl1s', 'Br','Br1s', 'I','I1s', - 'F','F1s','X','Xv','Xo']) + 'F','F1s','X','Xv','Xo','Pt','Ptv','Pto']) # Surface sites: -ATOMTYPES['X'] = AtomType(label='X', generic=['Rx', 'Rx!H'], specific=['Xv', 'Xo']) +ATOMTYPES['X'] = AtomType(label='X', generic=['Rx', 'Rx!H'], specific=['Xv', 'Xo', 'Pt']) # Vacant surface site: -ATOMTYPES['Xv'] = AtomType('Xv', generic=['X','Rx', 'Rx!H'], specific=[], +ATOMTYPES['Xv'] = AtomType('Xv', generic=['X','Rx', 'Rx!H'], specific=['Ptv'], single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0], benzene=[0], lone_pairs=[0]) # Occupied surface site: -ATOMTYPES['Xo'] = AtomType('Xo', generic=['X','Rx', 'Rx!H'], specific=[], +ATOMTYPES['Xo'] = AtomType('Xo', generic=['X','Rx', 'Rx!H'], specific=['Pto'], single=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], all_double=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], r_double=[], o_double=[], s_double=[], triple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], quadruple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], benzene=[0], lone_pairs=[0]) +ATOMTYPES['Pt'] = AtomType(label='Pt', generic=['Rx', 'Rx!H', 'X'], specific=['Ptv', 'Pto']) + # single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0], + # benzene=[0], lone_pairs=[0]) +ATOMTYPES['Ptv'] = AtomType(label='Ptv', generic=['Rx', 'Rx!H', 'X', 'Pt', 'Xv'], specific=[], + single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0], + benzene=[0], lone_pairs=[0]) +ATOMTYPES['Pto'] = AtomType(label='Pto', generic=['Rx', 'Rx!H', 'X', 'Pt', 'Xo'], specific=[], + single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0], + benzene=[0], lone_pairs=[0]) # Non-surface atomTypes, R being the most generic: ATOMTYPES['R'] = AtomType(label='R', generic=['Rx'], specific=[ @@ -675,6 +684,12 @@ def get_features(self): ATOMTYPES['X'].set_actions(increment_bond=['X'], decrement_bond=['X'], form_bond=['X'], break_bond=['X'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[]) ATOMTYPES['Xv'].set_actions(increment_bond=[], decrement_bond=[], form_bond=['Xo'], break_bond=[], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[]) ATOMTYPES['Xo'].set_actions(increment_bond=['Xo'], decrement_bond=['Xo'], form_bond=[], break_bond=['Xv'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[]) +ATOMTYPES['Pt'].set_actions(increment_bond=['Pt'], decrement_bond=['Pt'], form_bond=['Pt'], break_bond=['Pt'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[]) +ATOMTYPES['Ptv'].set_actions(increment_bond=[], decrement_bond=[], form_bond=['Pto'], break_bond=[], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[]) +ATOMTYPES['Pto'].set_actions(increment_bond=['Pto'], decrement_bond=['Pto'], form_bond=[], break_bond=['Ptv'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[]) + + + ATOMTYPES['R'].set_actions(increment_bond=['R'], decrement_bond=['R'], form_bond=['R'], break_bond=['R'], increment_radical=['R'], decrement_radical=['R'], increment_lone_pair=['R'], decrement_lone_pair=['R']) ATOMTYPES['R!H'].set_actions(increment_bond=['R!H'], decrement_bond=['R!H'], form_bond=['R!H'], break_bond=['R!H'], increment_radical=['R!H'], decrement_radical=['R!H'], increment_lone_pair=['R!H'], decrement_lone_pair=['R!H']) @@ -812,7 +827,7 @@ def get_features(self): ATOMTYPES['F1s'].set_actions(increment_bond=[], decrement_bond=[], form_bond=['F1s'], break_bond=['F1s'], increment_radical=['F1s'], decrement_radical=['F1s'], increment_lone_pair=[], decrement_lone_pair=[]) # these are ordered in priority of picking if a more general atomtype is encountered -allElements = ['H', 'C', 'O', 'N', 'S', 'P', 'Si', 'F', 'Cl', 'Br', 'I', 'Ne', 'Ar', 'He', 'X'] +allElements = ['H', 'C', 'O', 'N', 'S', 'P', 'Si', 'F', 'Cl', 'Br', 'I', 'Ne', 'Ar', 'He', 'X', 'Pt'] # list of elements that do not have more specific atomTypes nonSpecifics = ['H', 'He', 'Ne', 'Ar',] diff --git a/rmgpy/molecule/element.py b/rmgpy/molecule/element.py index ad713d5ce6..9d45e215d6 100644 --- a/rmgpy/molecule/element.py +++ b/rmgpy/molecule/element.py @@ -123,13 +123,13 @@ class PeriodicSystem(object): isotopes of the same element may have slight different electronegativities, which is not reflected below """ valences = {'H': 1, 'He': 0, 'C': 4, 'N': 3, 'O': 2, 'F': 1, 'Ne': 0, - 'Si': 4, 'P': 3, 'S': 2, 'Cl': 1, 'Br': 1, 'Ar': 0, 'I': 1, 'X': 4} + 'Si': 4, 'P': 3, 'S': 2, 'Cl': 1, 'Br': 1, 'Ar': 0, 'I': 1, 'X': 4, 'Pt': 4} valence_electrons = {'H': 1, 'He': 2, 'C': 4, 'N': 5, 'O': 6, 'F': 7, 'Ne': 8, - 'Si': 4, 'P': 5, 'S': 6, 'Cl': 7, 'Br': 7, 'Ar': 8, 'I': 7, 'X': 4} + 'Si': 4, 'P': 5, 'S': 6, 'Cl': 7, 'Br': 7, 'Ar': 8, 'I': 7, 'X': 4, 'Pt': 4} lone_pairs = {'H': 0, 'He': 1, 'C': 0, 'N': 1, 'O': 2, 'F': 3, 'Ne': 4, - 'Si': 0, 'P': 1, 'S': 2, 'Cl': 3, 'Br': 3, 'Ar': 4, 'I': 3, 'X': 0} + 'Si': 0, 'P': 1, 'S': 2, 'Cl': 3, 'Br': 3, 'Ar': 4, 'I': 3, 'X': 0, 'Pt': 0} electronegativity = {'H': 2.20, 'D': 2.20, 'T': 2.20, 'C': 2.55, 'C13': 2.55, 'N': 3.04, 'O': 3.44, 'O18': 3.44, - 'F': 3.98, 'Si': 1.90, 'P': 2.19, 'S': 2.58, 'Cl': 3.16, 'Br': 2.96, 'I': 2.66, 'X': 0.0} + 'F': 3.98, 'Si': 1.90, 'P': 2.19, 'S': 2.58, 'Cl': 3.16, 'Br': 2.96, 'I': 2.66, 'X': 0.0, 'Pt': 0} ################################################################################ diff --git a/rmgpy/molecule/molecule.py b/rmgpy/molecule/molecule.py index 034ee70654..f0991ff1b2 100644 --- a/rmgpy/molecule/molecule.py +++ b/rmgpy/molecule/molecule.py @@ -407,7 +407,7 @@ def is_surface_site(self): """ Return ``True`` if the atom represents a surface site or ``False`` if not. """ - return self.symbol == 'X' + return self.symbol == 'X' or self.symbol in [z.label for z in ATOMTYPES['X'].specific] def is_bonded_to_surface(self): """ @@ -984,18 +984,23 @@ def __init__(self, atoms=None, symmetry=-1, multiplicity=-187, reactive=True, pr self.from_inchi(inchi) self._inchi = inchi elif smiles: - if 'X' in smiles: - self.from_smiles(smiles.replace('X', 'Ar')) - lines = self.to_adjacency_list().split('\n') - for i, line in enumerate(lines): - if 'Ar' in line: - lines[i] = lines[i].replace('Ar', 'X') - # remove any extra electron pairs - lines[i] = lines[i].replace('p3', 'p0') - lines[i] = lines[i].replace('p2', 'p0') - lines[i] = lines[i].replace('p1', 'p0') - adj_list = '\n'.join(lines) - self = self.from_adjacency_list(adj_list) + for surface_site_symbol in ['X', 'Pt']: + if surface_site_symbol in smiles: + assert 'Ar' not in smiles + self.from_smiles(smiles.replace(surface_site_symbol, 'Ar')) + lines = self.to_adjacency_list().split('\n') + for i, line in enumerate(lines): + if 'Ar' in line: # The adjacency list needs to use the identified 'X' for a site + lines[i] = lines[i].replace('Ar', surface_site_symbol) + # remove any extra electron pairs + lines[i] = lines[i].replace('p3', 'p0') + lines[i] = lines[i].replace('p2', 'p0') + lines[i] = lines[i].replace('p1', 'p0') + adj_list = '\n'.join(lines) + self = self.from_adjacency_list(adj_list) + # but now we have to change the symbol back to 'Pt or 'X' for the smiles + # self.smiles = self.smiles.replace('X', surface_site_symbol) + break else: self.from_smiles(smiles) self._smiles = smiles @@ -1166,9 +1171,11 @@ def contains_surface_site(self): Returns ``True`` iff the molecule contains an 'X' surface site. """ cython.declare(atom=Atom) + cython.declare(z=AtomType) for atom in self.atoms: - if atom.symbol == 'X': + if atom.symbol == 'X' or atom.symbol in [z.label for z in ATOMTYPES['X'].specific]: return True + # atom_type = get_atomtype(atom, atom.bonds) return False def number_of_surface_sites(self): diff --git a/test/rmgpy/molecule/moleculeTest.py b/test/rmgpy/molecule/moleculeTest.py index 37f5f27b84..0fcf3cc311 100644 --- a/test/rmgpy/molecule/moleculeTest.py +++ b/test/rmgpy/molecule/moleculeTest.py @@ -1640,7 +1640,16 @@ def test_smiles(self): "C=C[X]", "CO.[X]", "C#[X]", - "CCC(C)[X]" + "CCC(C)[X]", + "[Pt]", + "[Pt]C=C[Pt]", + "O[Pt]", + "CO[Pt]", + "[PtH]", + "C=C[Pt]", + "CO.[Pt]", + "C#[Pt]", + "CCC(C)[Pt]" ] for s in test_strings: molecule = Molecule(smiles=s) From be6e89a23c88fa905c6fc1fd8e31d88545d9ac1c Mon Sep 17 00:00:00 2001 From: Sevy Harris Date: Mon, 21 Oct 2024 16:07:32 -0400 Subject: [PATCH 4/4] update unit test for surface site to work with Pt --- rmgpy/molecule/adjlist.py | 4 +++- rmgpy/molecule/atomtype.py | 4 ++-- rmgpy/molecule/translator.py | 7 +++++++ test/rmgpy/molecule/moleculeTest.py | 2 +- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/rmgpy/molecule/adjlist.py b/rmgpy/molecule/adjlist.py index 265d24ebd0..75275e45ff 100644 --- a/rmgpy/molecule/adjlist.py +++ b/rmgpy/molecule/adjlist.py @@ -92,7 +92,9 @@ def check_partial_charge(atom): the theoretical one: """ - if atom.symbol in {'X','L','R'} or 'X' in [z.label for z in get_atomtype(atom, atom.bonds).generic]: + # if atom.symbol in {'X','L','R'} or 'X' in [z.label for z in get_atomtype(atom, atom.bonds).generic]: + # TODO handle this in a more generic way so we don't have to add more metals here + if atom.symbol in {'X','L','R','Pt'}: return # because we can't check it. valence = PeriodicSystem.valence_electrons[atom.symbol] diff --git a/rmgpy/molecule/atomtype.py b/rmgpy/molecule/atomtype.py index 0f4ed1d44f..455fe9b577 100644 --- a/rmgpy/molecule/atomtype.py +++ b/rmgpy/molecule/atomtype.py @@ -294,8 +294,8 @@ def get_features(self): single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0], benzene=[0], lone_pairs=[0]) ATOMTYPES['Pto'] = AtomType(label='Pto', generic=['Rx', 'Rx!H', 'X', 'Pt', 'Xo'], specific=[], - single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0], - benzene=[0], lone_pairs=[0]) + single=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], all_double=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], r_double=[], o_double=[], s_double=[], triple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], + quadruple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], benzene=[0], lone_pairs=[0]) # Non-surface atomTypes, R being the most generic: ATOMTYPES['R'] = AtomType(label='R', generic=['Rx'], specific=[ diff --git a/rmgpy/molecule/translator.py b/rmgpy/molecule/translator.py index 731d8d8d8e..fab32ba44a 100644 --- a/rmgpy/molecule/translator.py +++ b/rmgpy/molecule/translator.py @@ -370,6 +370,11 @@ def _rdkit_translator(input_object, identifier_type, mol=None): output = from_rdkit_mol(mol, rdkitmol) elif isinstance(input_object, mm.Molecule): # We are converting from a molecule to a string identifier + generic_X = False # keep track of whether this is generic 'X' or specific 'Pt' + for atom in input_object.vertices: + if atom.element.symbol == 'X': + generic_X = True + break if identifier_type == 'smi': rdkitmol = to_rdkit_mol(input_object, sanitize=False) else: @@ -391,6 +396,8 @@ def _rdkit_translator(input_object, identifier_type, mol=None): else: raise ValueError('Unexpected input format. Should be a Molecule or a string.') + if generic_X: + output = output.replace('Pt', 'X') return output diff --git a/test/rmgpy/molecule/moleculeTest.py b/test/rmgpy/molecule/moleculeTest.py index 0fcf3cc311..61b863abe2 100644 --- a/test/rmgpy/molecule/moleculeTest.py +++ b/test/rmgpy/molecule/moleculeTest.py @@ -262,7 +262,7 @@ def test_is_surface_site(self): """ for element in element_list: atom = Atom(element=element, radical_electrons=0, charge=0, label="*1", lone_pairs=0) - if element.symbol == "X": + if element.symbol in ["X", "Pt"]: assert atom.is_surface_site() else: assert not atom.is_surface_site()