From 6230bd927e4edd974abbcca8b3ad84edaa70f833 Mon Sep 17 00:00:00 2001 From: Co Quach <43968221+daico007@users.noreply.github.com> Date: Thu, 7 Sep 2023 12:29:14 -0500 Subject: [PATCH] Turn off identify_connected_components default (#760) * turn off identify_connected_components * rename option to better reflect its purpose and shorten assert_x_params * address remaining failed tests * address comments by Cal and Chris * quick fix to a docstring * Update gmso/parameterization/parameterize.py Co-authored-by: CalCraven <54594941+CalCraven@users.noreply.github.com> * Update gmso/parameterization/topology_parameterizer.py Co-authored-by: CalCraven <54594941+CalCraven@users.noreply.github.com> --------- Co-authored-by: chrisjonesbsu Co-authored-by: CalCraven <54594941+CalCraven@users.noreply.github.com> --- gmso/parameterization/parameterize.py | 57 ++++++---------- .../topology_parameterizer.py | 68 +++++++------------ .../test_impropers_parameterization.py | 4 +- gmso/tests/parameterization/test_opls_gmso.py | 2 +- .../test_parameterization_options.py | 28 ++++---- .../parameterization/test_trappe_gmso.py | 2 +- gmso/utils/specific_ff_to_residue.py | 4 +- 7 files changed, 68 insertions(+), 97 deletions(-) diff --git a/gmso/parameterization/parameterize.py b/gmso/parameterization/parameterize.py index bf09d857c..581ca9e22 100644 --- a/gmso/parameterization/parameterize.py +++ b/gmso/parameterization/parameterize.py @@ -12,12 +12,9 @@ def apply( forcefields, match_ff_by="molecule", identify_connections=False, - identify_connected_components=True, - use_molecule_info=False, - assert_bond_params=True, - assert_angle_params=True, - assert_dihedral_params=True, - assert_improper_params=False, + speedup_by_molgraph=False, + speedup_by_moltag=False, + ignore_params=["improper"], remove_untyped=False, fast_copy=True, ): @@ -25,10 +22,10 @@ def apply( Parameters ---------- - top: gmso.core.topology.Topology, required + top : gmso.core.topology.Topology, required The GMSO topology on which to apply forcefields - forcefields: ForceField or dict, required + forcefields : ForceField or dict, required The forcefield to apply. If a dictionary is used the keys are labels that match the molecule name (specified as a label of site), and the values are gmso ForceField objects that gets applied to the specified molecule. @@ -36,39 +33,31 @@ def apply( a ForceField object. If a dictionary of ForceFields is provided, this method will fail. - match_ff_by: str, optional, default="molecule" + match_ff_by : str, optional, default="molecule" They site's tag used to match the forcefields provided above to the Topology. Options include "molecule" and "group". This option is only valid if forcefields are provided as a dict. - identify_connections: bool, optional, default=False + identify_connections : bool, optional, default=False If true, add connections identified using networkx graph matching to match the topology's bonding graph to smaller sub-graphs that correspond to an angle, dihedral, improper etc - identify_connected_components: bool, optional, default=True + speedup_by_molgraph: bool, optional, default=False A flag to determine whether or not to search the topology for repeated disconnected structures, otherwise known as molecules and type each molecule only once. + This option will be usefult to handle systems with many repeated small molecules, + but may slow down system with large molecule, e.g., monolayer. - use_molecule_info: bool, optional, default=False - A flag to determine whether or not to look at site.residue_name to look parameterize - each molecule only once. Currently unused. + speedup_by_moltag : bool, optional, default=False + A flag to determine whether or not to look at site.molecule_name to try to parameterize + each molecule only once. This option provides speedup for topologies with properly + assigned molecule and residue labels. - assert_bond_params : bool, optional, default=True - If True, an error is raised if parameters are not found for all system - bonds. - - assert_angle_params : bool, optional, default=True - If True, an error is raised if parameters are not found for all system - angles. - - assert_dihedral_params : bool, optional, default=True - If True, an error is raised if parameters are not found for all system - proper dihedrals. - - assert_improper_params : bool, optional, default=False - If True, an error is raised if parameters are not found for all system - improper dihedrals. + ignore_params : set or list or tuple, optional, default=["impropers"] + Skipping the checks that make sure all connections (in the list) have a connection types. + Available options includes "bonds", "angles", "dihedrals", and "impropers". + If you wish to have all connection types checks, provides an empty set/list/tuple. remove_untyped : bool, optional, default=False If True, after the atomtyping and parameterization step, remove all connection @@ -83,16 +72,14 @@ def apply( this should be changed to False if further modification of expressions are necessary post parameterization. """ + ignore_params = set([option.lower() for option in ignore_params]) config = TopologyParameterizationConfig.parse_obj( dict( match_ff_by=match_ff_by, identify_connections=identify_connections, - identify_connected_components=identify_connected_components, - use_molecule_info=use_molecule_info, - assert_bond_params=assert_bond_params, - assert_angle_params=assert_angle_params, - assert_dihedral_params=assert_dihedral_params, - assert_improper_params=assert_improper_params, + speedup_by_molgraph=speedup_by_molgraph, + speedup_by_moltag=speedup_by_moltag, + ignore_params=ignore_params, remove_untyped=remove_untyped, fast_copy=fast_copy, ) diff --git a/gmso/parameterization/topology_parameterizer.py b/gmso/parameterization/topology_parameterizer.py index b33df3d61..f336326fb 100644 --- a/gmso/parameterization/topology_parameterizer.py +++ b/gmso/parameterization/topology_parameterizer.py @@ -58,44 +58,24 @@ class TopologyParameterizationConfig(GMSOBase): "angle, dihedral, improper etc", ) - identify_connected_components: bool = Field( + speedup_by_molgraph: bool = Field( default=False, description="A flag to determine whether or not to search the topology" " for repeated disconnected structures, otherwise known as " "molecules and type each molecule only once.", ) - use_molecule_info: bool = Field( + speedup_by_moltag: bool = Field( default=False, description="A flag to determine whether or not to look at site.molecule " "to look parameterize each molecule only once. Will only be used if " - "identify_connected_components=False", - ) # Unused - - assert_bond_params: bool = Field( - default=True, - description="If True, an error is raised if parameters are not found for " - "all system bonds.", + "speedup_by_molgraph=True", ) - assert_angle_params: bool = Field( - default=True, - description="If True, an error is raised if parameters are not found for " - "all system angles", - ) - - assert_dihedral_params: bool = ( - Field( - default=True, - description="If True, an error is raised if parameters are not found for " - "all system dihedrals.", - ), - ) - - assert_improper_params: bool = Field( - default=False, - description="If True, an error is raised if parameters are not found for " - "all system impropers.", + ignore_params: list = Field( + default=[], + description="Skipping the checks that make sure all connections (in the list) " + "have a connection types.", ) remove_untyped: bool = Field( @@ -134,7 +114,7 @@ def get_ff(self, key=None): else: return self.forcefields - def _parameterize_sites(self, sites, typemap, ff, use_molecule_info=None): + def _parameterize_sites(self, sites, typemap, ff, speedup_by_moltag=None): """Parameterize sites with appropriate atom-types from the forcefield.""" for j, site in enumerate(sites): site.atom_type = ff.get_potential( @@ -170,16 +150,20 @@ def _parameterize_connections( impropers = top.impropers self._apply_connection_parameters( - bonds, ff, self.config.assert_bond_params + bonds, ff, False if "bond" in self.config.ignore_params else True ) self._apply_connection_parameters( - angles, ff, self.config.assert_angle_params + angles, ff, False if "angle" in self.config.ignore_params else True ) self._apply_connection_parameters( - dihedrals, ff, self.config.assert_dihedral_params + dihedrals, + ff, + False if "dihedral" in self.config.ignore_params else True, ) self._apply_connection_parameters( - impropers, ff, self.config.assert_improper_params + impropers, + ff, + False if "improper" in self.config.ignore_params else True, ) def _apply_connection_parameters( @@ -231,7 +215,7 @@ def _apply_connection_parameters( ) def _parameterize( - self, top, typemap, label_type=None, label=None, use_molecule_info=False + self, top, typemap, label_type=None, label=None, speedup_by_moltag=False ): """Parameterize a topology/subtopology based on an atomtype map.""" if label and label_type: @@ -242,7 +226,7 @@ def _parameterize( sites = top.sites self._parameterize_sites( - sites, typemap, forcefield, use_molecule_info=use_molecule_info + sites, typemap, forcefield, speedup_by_moltag=speedup_by_moltag ) self._parameterize_connections( top, @@ -353,27 +337,27 @@ def run_parameterization(self): self.topology, self.config.match_ff_by, label, - self.config.use_molecule_info, - self.config.identify_connected_components, + self.config.speedup_by_moltag, + self.config.speedup_by_molgraph, ) self._parameterize( self.topology, typemap, label_type=self.config.match_ff_by, label=label, - use_molecule_info=self.config.use_molecule_info, # This will be removed from the future iterations + speedup_by_moltag=self.config.speedup_by_moltag, # This will be removed from the future iterations ) else: typemap = self._get_atomtypes( self.get_ff(), self.topology, - use_molecule_info=self.config.use_molecule_info, - use_isomorphic_checks=self.config.identify_connected_components, + speedup_by_moltag=self.config.speedup_by_moltag, + use_isomorphic_checks=self.config.speedup_by_molgraph, ) self._parameterize( self.topology, typemap, - use_molecule_info=self.config.use_molecule_info, + speedup_by_moltag=self.config.speedup_by_moltag, ) self._set_scaling_factors() # Set global or per molecule scaling factors @@ -417,7 +401,7 @@ def _get_atomtypes( topology, label_type=None, label=None, - use_molecule_info=False, + speedup_by_moltag=False, use_isomorphic_checks=False, ): """Run atom-typing in foyer and return the typemap.""" @@ -428,7 +412,7 @@ def _get_atomtypes( label, ) - if use_molecule_info: + if speedup_by_moltag: # Iterate through foyer_topology_graph, which is a subgraph of label_type typemap, reference = dict(), dict() for connected_component in nx.connected_components( diff --git a/gmso/tests/parameterization/test_impropers_parameterization.py b/gmso/tests/parameterization/test_impropers_parameterization.py index 922b2ebf0..c5952da37 100644 --- a/gmso/tests/parameterization/test_impropers_parameterization.py +++ b/gmso/tests/parameterization/test_impropers_parameterization.py @@ -17,7 +17,7 @@ class TestImpropersParameterization(ParameterizationBaseTest): def test_improper_parameterization(self, fake_improper_ff_gmso, ethane): ethane.identify_connections() - apply(ethane, fake_improper_ff_gmso, assert_improper_params=True) + apply(ethane, fake_improper_ff_gmso, ignore_params=list()) lib = PotentialTemplateLibrary() template_improper_type = lib["PeriodicImproperPotential"] @@ -61,7 +61,7 @@ def test_improper_parameterization(self, fake_improper_ff_gmso, ethane): def test_improper_assertion_error(self, ethane_methane_top, oplsaa_gmso): with pytest.raises(ParameterizationError): - apply(ethane_methane_top, oplsaa_gmso, assert_improper_params=True) + apply(ethane_methane_top, oplsaa_gmso, ignore_params=list()) @pytest.mark.parametrize( "mol2_loc", diff --git a/gmso/tests/parameterization/test_opls_gmso.py b/gmso/tests/parameterization/test_opls_gmso.py index c5ed7639f..20e02a5ad 100644 --- a/gmso/tests/parameterization/test_opls_gmso.py +++ b/gmso/tests/parameterization/test_opls_gmso.py @@ -48,7 +48,7 @@ def test_foyer_oplsaa_files( gmso_top_from_pmd = from_parmed(struct, refer_type=True) gmso_top = from_parmed(struct, refer_type=False) - apply(gmso_top, oplsaa_gmso, identify_connected_components=False) + apply(gmso_top, oplsaa_gmso, speedup_by_molgraph=False) assert_same_atom_params(gmso_top, gmso_top_from_pmd) assert_same_connection_params(gmso_top, gmso_top_from_pmd) diff --git a/gmso/tests/parameterization/test_parameterization_options.py b/gmso/tests/parameterization/test_parameterization_options.py index 64f2fd2a2..6615032f8 100644 --- a/gmso/tests/parameterization/test_parameterization_options.py +++ b/gmso/tests/parameterization/test_parameterization_options.py @@ -137,23 +137,23 @@ def test_empty_top_parameterization(self, oplsaa_gmso): apply(top=Topology(), forcefields=oplsaa_gmso) @pytest.mark.parametrize( - "identify_connected_components, use_molecule_info", + "speedup_by_molgraph, speedup_by_moltag", [(False, False), (True, False), (False, True), (True, True)], ) def test_speedup_options( self, ethane_box_with_methane, oplsaa_gmso, - identify_connected_components, - use_molecule_info, + speedup_by_molgraph, + speedup_by_moltag, ): ethane_box_with_methane.identify_connections() apply( ethane_box_with_methane, oplsaa_gmso, identify_connections=False, - identify_connected_components=identify_connected_components, - use_molecule_info=use_molecule_info, + speedup_by_molgraph=speedup_by_molgraph, + speedup_by_moltag=speedup_by_moltag, ) molecule_labels = ethane_box_with_methane.unique_site_labels("molecule") @@ -214,8 +214,8 @@ def test_match_ff_by_molecule(self, ethane_box_with_methane, oplsaa_gmso): ff_dict, match_ff_by="molecule", identify_connections=False, - identify_connected_components=True, - use_molecule_info=True, + speedup_by_molgraph=True, + speedup_by_moltag=True, ) assert ethane_box_with_methane.atom_types is not None @@ -231,13 +231,13 @@ def test_match_ff_by_group(self, ethane_box_with_methane, oplsaa_gmso): ff_dict, match_ff_by="group", identify_connections=False, - identify_connected_components=True, - use_molecule_info=True, + speedup_by_molgraph=True, + speedup_by_moltag=True, ) assert ethane_box_with_methane.atom_types is not None @pytest.mark.parametrize( - "identify_connected_components, use_molecule_info, match_ff_by", + "speedup_by_molgraph, speedup_by_moltag, match_ff_by", [ (False, False, "group"), (True, False, "group"), @@ -253,8 +253,8 @@ def test_hierarchical_mol_structure( self, oplsaa_gmso, hierarchical_top, - identify_connected_components, - use_molecule_info, + speedup_by_molgraph, + speedup_by_moltag, match_ff_by, ): top = deepcopy(hierarchical_top) @@ -274,7 +274,7 @@ def test_hierarchical_mol_structure( apply( top, ff_dict, - identify_connected_components=identify_connected_components, - use_molecule_info=use_molecule_info, + speedup_by_molgraph=speedup_by_molgraph, + speedup_by_moltag=speedup_by_moltag, match_ff_by=match_ff_by, ) diff --git a/gmso/tests/parameterization/test_trappe_gmso.py b/gmso/tests/parameterization/test_trappe_gmso.py index 7442a5135..cb448b6d0 100644 --- a/gmso/tests/parameterization/test_trappe_gmso.py +++ b/gmso/tests/parameterization/test_trappe_gmso.py @@ -48,7 +48,7 @@ def test_foyer_trappe_files( apply( gmso_top, trappe_ua_gmso, - identify_connected_components=False, + speedup_by_molgraph=False, identify_connections=True, ) gmso_top_from_parmeterized_pmd = from_parmed(struct_pmd) diff --git a/gmso/utils/specific_ff_to_residue.py b/gmso/utils/specific_ff_to_residue.py index 0b5976d04..7e6577446 100644 --- a/gmso/utils/specific_ff_to_residue.py +++ b/gmso/utils/specific_ff_to_residue.py @@ -284,10 +284,10 @@ def specific_ff_to_residue( gmso_apply( new_gmso_topology, gmso_compatible_forcefield_selection, - identify_connected_components=True, + speedup_by_molgraph=True, identify_connections=True, match_ff_by=gmso_match_ff_by, - use_molecule_info=True, + speedup_by_moltag=True, remove_untyped=True, ) new_gmso_topology.update_topology()