From 392b7a2c22dffb76546e8d4e7c215d4eb18ab661 Mon Sep 17 00:00:00 2001 From: Brad Crawford Date: Sun, 24 Sep 2023 10:55:58 -0400 Subject: [PATCH] added unit tests for user input and other checks. Added Guasian 16 and GOMC >=2.75 compatible. --- .../installation/installation.rst | 20 +- .../quick_start/quick_start.rst | 2 +- docs/index.rst | 14 +- docs/overview/general_info.rst | 12 +- .../citing_mosdef_dihedral_fit_python.rst | 2 +- docs/reference/credits.rst | 8 +- .../dihedral_fit/fit_dihedral_with_gomc.py | 278 +++- .../tests/test_fit_dihedral_with_gomc.py | 1299 ++++++++++++++++- .../utils/file_read_and_write.py | 5 +- mosdef_dihedral_fit/utils/io.py | 17 +- 10 files changed, 1554 insertions(+), 103 deletions(-) diff --git a/docs/getting_started/installation/installation.rst b/docs/getting_started/installation/installation.rst index e34fc9e..d15cecc 100644 --- a/docs/getting_started/installation/installation.rst +++ b/docs/getting_started/installation/installation.rst @@ -3,11 +3,11 @@ Installation ============ .. note:: - The GOMC software need to be installed manually, outside of this Python install, + The GOMC >= v2.75 software need to be installed manually, outside of this Python install, with it's directory/path specified in the dihedral fit function. -Recommended installation is with `mamba `_ ------------------------------------------------------------------------------- +Installation with `mamba `_ (Recommended) +----------------------------------------------------------------------------- :: $ mamba install -c conda-forge mosdef-dihedral-fit @@ -18,15 +18,9 @@ Install with `conda `_ $ conda install -c conda-forge mosdef-dihedral-fit -There is an issue building MoSDeF-GOMC version 1.0.0 with ``conda`` or ``conda-forge`` -not extracting the latest ``conda`` build version. Therefore, the user can conduct -the additional command below or install using ``mamba`` because ``mamba`` is using the correct build.:: - $ conda install -c conda-forge sympy=1.10 garnett gsd pycifrw - - -Install an editable version from the source code ------------------------------------------------- +Install an editable version via the source code +----------------------------------------------- It is common practice to utilize a pre-packaged ``Python`` distribution like `Miniconda `_ to @@ -49,7 +43,7 @@ Install pre-commit To maintain uniform coding, this software utilizes the `pre-commit `_ package. -To check all the files, you can run:: +To check all the files using pre-commit, run:: $ pre-commit run --all-files @@ -57,7 +51,7 @@ To check all the files, you can run:: Supported Python Versions ------------------------- -``Python 3.9`` is currently the only officially supported and tested version during the +``Python 3.9 and 3.10`` are currently the only officially supported and tested version during the software development. Older versions of ``Python`` may work, but they are not guaranteed to work. Testing your installation diff --git a/docs/getting_started/quick_start/quick_start.rst b/docs/getting_started/quick_start/quick_start.rst index 2e12b38..dc21a1b 100644 --- a/docs/getting_started/quick_start/quick_start.rst +++ b/docs/getting_started/quick_start/quick_start.rst @@ -5,7 +5,7 @@ Quick Start ----------- .. note:: - The GOMC software need to be installed manually, outside of this Python install, + The GOMC >= v2.75 software need to be installed manually, outside of this Python install, with it's directory/path specified in the dihedral fit function. These examples are provided as a python files, which the user can run and modifiy. diff --git a/docs/index.rst b/docs/index.rst index cf665ea..157ccf7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,9 +9,9 @@ dihedrals, then also analytically converts them to the periodic/`CHARMM `_ and +`GPU Optimized Monte Carlo (GOMC) >= v2.75 `_ and `Molecular Simulation Design Framework (MoSDeF) `_, with only tens of lines of python code, -the `Gaussian `_ log files, and a **mol2** file. +the `Gaussian 16 `_ log files, and a **mol2** file. .. note:: Currently, this means that only the fourth (4th) cosine multiple or power is utilized in the dihedral fit. @@ -24,21 +24,21 @@ the `Gaussian `_ log files, and a **mol2** file. The `MoSDeF-GOMC `_ software package is used for the Molecular Mechanics (MM) calculation, which utilizes -`GPU Optimized Monte Carlo (GOMC) `_, the +`GPU Optimized Monte Carlo (GOMC) >= v2.75 `_, the `Molecular Simulation Design Framework (MoSDeF) `_, and the `vmd-python `_ core software packages. For the Quantum Mechanics calculations, -the **Gaussian** software is used, reading the **Gaussian** log files. -From the **Gaussian** log file, a user created **mol2** file, and a few user inputs, this software automatically +the **Gaussian 16** software is used, reading the **Gaussian 16** log files. +From the **Gaussian 16** log file, a user created **mol2** file, and a few user inputs, this software automatically fits the desired dihedral, accounting for multiple dihedrals simultaneously. The software output provides a wide range of allowable dihedral fits with different cosine term combinations, including plots for visual reference; from this information, the user can then select the best dihedral fit for the specific application. Additionally, the 1-4 interactions for the force fields can be explicitly set in the force field XML file, allowing the flexibility that some other dihedral fitters lack. Lastly, the dihedral fits are compared by recalculating -the dihedral in **GOMC** and comparing it to the original **Gaussian** energies, ensuring a correct dihedral fit. +the dihedral in **GOMC >= v2.75** and comparing it to the original **Gaussian 16** energies, ensuring a correct dihedral fit. **MoSDeF-dihedral-fit Highlights**: - #. With a **Gaussian** log file and a few user inputs, the user can easily fit a dihedral. + #. With a **Gaussian 16** log file and a few user inputs, the user can easily fit a dihedral. #. **MoSDeF-dihedral-fit** is designed to automate the dihedral fit diff --git a/docs/overview/general_info.rst b/docs/overview/general_info.rst index 0322c41..a749c6a 100644 --- a/docs/overview/general_info.rst +++ b/docs/overview/general_info.rst @@ -12,9 +12,9 @@ dihedrals, then also analytically converts them to the periodic/`CHARMM `_ and +`GPU Optimized Monte Carlo (GOMC) >= v2.75 `_ and `Molecular Simulation Design Framework (MoSDeF) `_, with only tens of lines of python code, -the `Gaussian `_ log files, and a **mol2** file. +the `Gaussian 16 `_ log files, and a **mol2** file. .. note:: Currently, this means that only the fourth (4th) cosine multiple or power is utilized in the dihedral fit. @@ -30,14 +30,14 @@ for the Molecular Mechanics (MM) calculation, which utilizes `GPU Optimized Monte Carlo (GOMC) `_, the `Molecular Simulation Design Framework (MoSDeF) `_, and the `vmd-python `_ core software packages. For the Quantum Mechanics calculations, -the **Gaussian** software is used, reading the **Gaussian** log files. -From the **Gaussian** log file, a user created **mol2** file, and a few user inputs, this software automatically +the **Gaussian 16** software is used, reading the **Gaussian 16** log files. +From the **Gaussian 16** log file, a user created **mol2** file, and a few user inputs, this software automatically fits the desired dihedral, accounting for multiple dihedrals simultaneously. The software output provides a wide range of allowable dihedral fits with different cosine term combinations, including plots for visual reference; from this information, the user can then select the best dihedral fit for the specific application. Additionally, the 1-4 interactions for the force fields can be explicitly set in the force field XML file, allowing the flexibility that some other dihedral fitters lack. Lastly, the dihedral fits are compared by recalculating -the dihedral in **GOMC** and comparing it to the original **Gaussian** energies, ensuring a correct dihedral fit. +the dihedral in **GOMC >= v2.75** and comparing it to the original **Gaussian 16** energies, ensuring a correct dihedral fit. @@ -73,4 +73,4 @@ The **vmd-python** software is licensed by the Theoretical and Computational Bio The vmd-python website is https://github.com/Eigenstate/vmd-python -The official VMD web page is http://www.ks.uiuc.edu/Research/vmd \ No newline at end of file +The official VMD web page is http://www.ks.uiuc.edu/Research/vmd diff --git a/docs/reference/citing_mosdef_dihedral_fit_python.rst b/docs/reference/citing_mosdef_dihedral_fit_python.rst index 84eb812..754e560 100644 --- a/docs/reference/citing_mosdef_dihedral_fit_python.rst +++ b/docs/reference/citing_mosdef_dihedral_fit_python.rst @@ -47,7 +47,7 @@ Download as :download:`BibTeX ` or :download:`RI -Please also cite the **MoSDeF** software: +Please also cite the following software: * `mBuild `_ -- A hierarchical, component based molecule builder * `foyer `_ -- A package for atom-typing as well as applying and disseminating forcefields diff --git a/docs/reference/credits.rst b/docs/reference/credits.rst index 8fe4bff..e9a7818 100644 --- a/docs/reference/credits.rst +++ b/docs/reference/credits.rst @@ -10,7 +10,7 @@ MoSDeF-dihedral-fit developers: **Nicholas Craven** - Developer -**Christopher R. Iacovella** - Developer +**Christopher R. Iacovella** - Advisor **Clare McCabe** - Advisor @@ -19,6 +19,10 @@ MoSDeF-dihedral-fit developers: **Jeffrey Potoff** - Advisor and maintainer -`MoSDeF-dihedral-fit `_ was developed and created as a collaborative effort between **Wayne State University** and **Vanderbilt University**, which included the `GPU Optimized Monte Carlo (GOMC) `_,`Molecular Simulation Design Framework (MoSDeF) `_, and `MoSDeF-GOMC `_ teams. +`MoSDeF-dihedral-fit `_ was developed and created as a collaborative effort between **Wayne State University**, **Vanderbilt University**, and **Atomfold LLC** (`Atomfold LLC website `_). + +**Wayne State University** developled the `GPU Optimized Monte Carlo (GOMC) `_ engine, which is utilized in this software. + +Other collaboriative efforts between **Wayne State University** and **Vanderbilt University** include the `Molecular Simulation Design Framework (MoSDeF) `_ and `MoSDeF-GOMC `_ software, which are utilized in this software. The **MoSDeF-dihedral-fit** software was supported via National Science Foundation (**NSF**) grants OAC-1835713, OAC-1835874, and CBET-2052438. Any statements stated or expressed are solely those of the **MoSDeF-dihedral-fit** software or developers and not the **NSF**. diff --git a/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py b/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py index 3cc31fe..9ad79a7 100755 --- a/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py +++ b/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py @@ -5,6 +5,7 @@ import mbuild as mb import subprocess import unyt as u +from unyt.dimensions import angle, energy, length, temperature import mosdef_gomc.formats.gmso_charmm_writer as mf_charmm import mosdef_gomc.formats.gmso_gomc_conf_writer as gomc_control import matplotlib.pyplot as plt @@ -26,17 +27,17 @@ def fit_dihedral_with_gomc( fit_dihedral_atom_types, mol2_selection, forcefield_selection, - temperature, + temperature_unyt_units, gomc_binary_path, qm_log_files_and_entries_to_remove_dict, manual_dihedral_atom_numbers_list=None, zeroed_dihedral_atom_types=None, qm_engine="gaussian", - override_VDWGeometricSigma=None, + VDWGeometricSigma=None, atom_type_naming_style='general', gomc_cpu_cores=1, - fit_min_validated_r_squared=0.99, - fit_validation_r_squared_rtol=5e-03 + fit_min_validated_r_squared=0.98, + fit_validation_r_squared_rtol=2.5e-02 ): """Fit the desired dihedral to a MM force field, based on QM data. @@ -72,6 +73,12 @@ def fit_dihedral_with_gomc( fit_dihedral_atom_types: list of four (4) strings (Example: ['HC', 'CT, 'CT, 'HC']) The atom types/classes (strings in the list) of the dihedral which is being fitted with non-zero k-values. + + NOTE: The extracted atom types/classes can be determined also by + looking at the 'GOMC_pdb_psf_ff_files_dihedrals_per_xml.inp' and + 'GOMC_pdb_psf_ff_files_dihedrals_zeroed.inp' files in the + 'GOMC_simulations' folder. These files can also be checked to + confirm it is zeroing the correct dihedrals. mol2_selection: str The mol2 file which matches the element, atom type, bonded connnections, the 'EXACT ATOM ORDER AND CONFIGURATION AS IN THE QM SIMULATION INPUT FILES'. @@ -86,23 +93,24 @@ def fit_dihedral_with_gomc( * Example str for FF file: 'path_to file/trappe-ua.xml' - temperature: unyt.unyt_quantity + temperature_unyt_units: unyt.unyt_quantity The temperature of the system that was performed for the Quantum Mechanics (QM) simulation. gomc_binary_path: str - The path or directory of the GOMC binary file "GOMC_CPU_NVT", which is used to - perform the Molecular Mechanics (MM) energy calculations. This does not include - the "GOMC_CPU_NVT" in this variable. + The path or directory of the GOMC binary file "GOMC_CPU_NVT" (GOMC >= v2.75), + which is used to perform the Molecular Mechanics (MM) energy calculations. + This does not include the "GOMC_CPU_NVT" in this variable. Example: '/home/brad/Programs/GOMC/GOMC_2_76/bin' - qm_log_files_and_entries_to_remove_dict: dict, {str: [int, ..., int]} + qm_log_files_and_entries_to_remove_dict: dict, {str: [int>=0, ..., int>=0]} * qm_engine="gaussian" - This is a dictionary comprised of a key (string) of the QM log file path and name, - and a list of integers, which are the QM optimization parameters to remove from - the written data, in order of reading from each file. These can be seen in the - order of the dictionary file name (strings). These removed parameters allow - users to remove any bad or repeated data points for the QM log file when needed. + This is a dictionary comprised of a key (string) of the QM log file path and name + (Gaussian 16 log file only), and a list of integers, which are the QM optimization + parameters to remove from the written data, in order of reading from each file. + These can be seen in the order of the dictionary file name (strings). + These removed parameters allow users to remove any bad or repeated data + points for the QM log file when needed. Example 1: {'path/guassian_log_file.log': []} @@ -114,7 +122,7 @@ def fit_dihedral_with_gomc( * qm_engine="gaussian_style_final_files" This is a dictionary comprised of a key (string) of the file paths to the - Gaussian style final formatted files, and a list of integers, which are the + Gaussian 16 style final formatted files, and a list of integers, which are the QM optimization parameters to remove from the written data, in order of reading from each folder. These can be seen in the order of the dictionary file name (strings). These removed parameters allow users to remove any bad or repeated data points @@ -170,10 +178,16 @@ def fit_dihedral_with_gomc( Example: [['CT', 'CT, 'CT, 'HC'], ['NT', 'CT, 'CT, 'HC']] + NOTE: The extracted atom types/classes can be determined also by + looking at the 'GOMC_pdb_psf_ff_files_dihedrals_per_xml.inp' and + 'GOMC_pdb_psf_ff_files_dihedrals_zeroed.inp' files in the + 'GOMC_simulations' folder. These files can also be checked to + confirm it is zeroing the correct dihedrals. + qm_engine: str (currently only 'guassian'), default='guassian' The Quantum Mechanics (QM) simulation engine utilized to produce the files listed in the 'qm_log_files_and_entries_to_remove_dict' variable(s). - override_VDWGeometricSigma: boolean, default = None + VDWGeometricSigma: boolean, default = None Override the VDWGeometricSigma in the foyer or GMSO XML file. If this is None, it will use whatever is specified in the XML file, or the default foyer or GMSO values. BEWARE, if it is not specified XML file, it has a default. @@ -246,10 +260,10 @@ def fit_dihedral_with_gomc( --- If the general CHARMM style atom type in any residue/molecule's gomc_fix_bonds_angles, gomc_fix_bonds, or gomc_fix_angles are IN any other residue/molecule. - gomc_cpu_cores: int, default=1 + gomc_cpu_cores: int>0, default=1 The number of CPU-cores that are used to perform the GOMC simulations, required for the Molecular Mechanics (MM) energy calulations. - fit_min_validated_r_squared: float (0 <= fit_min_validated_r_squared <= 1), default=0.99 + fit_min_validated_r_squared: float (0 < fit_min_validated_r_squared < 1), default=0.98 The minimum R**2 (R-squared) value to test the validity of the fit with the new dihedral fitted constants, as fitted in the QM - MM energy data vs. the dihedral function fit, mentioned below. @@ -270,8 +284,10 @@ def fit_dihedral_with_gomc( same dihedrals being fit simultaneously, and the 'zeroed_dihedral_atom_types' are dihedral energies are set to zero. - fit_validation_r_squared_rtol: float, default=5e-03 - Where the QM data is defined as the actual data; this is the fractional difference + NOTE: This value may need adjusted to get the dihedral fit to solve correctly. + + fit_validation_r_squared_rtol: float>0, default=2.5e-02 + Where the QM data is defined as the actual data; this is the difference of the dihedral's calculated R-squared values between: * The QM-MM fitting process, where the fit MM dihedral k-values are zero (0). * The MM calculations where the fit k-value are entered in the MM data and @@ -280,11 +296,13 @@ def fit_dihedral_with_gomc( fit_dihedral_atom_types, mol2_selection, forcefield_selection, - temperature, + temperature_unyt_units, gomc_binary_path, qm_log_files_and_entries_to_remove_dict, zeroed_dihedral_atom_types=None, + NOTE: This value may need adjusted to get the dihedral fit to solve correctly. + Returns ------- Files containing the following information in the following relative locations: @@ -474,34 +492,189 @@ def fit_dihedral_with_gomc( or more of the nearly perfect R-squared fitted values, or fitting procedure itself. """ + # check if 'mol2_selection' file is correct format + if not isinstance(mol2_selection, str): + raise TypeError("ERROR: Please enter mol2 file ('mol2_selection') as a string.") + + extension_ff_name = os.path.splitext(mol2_selection)[-1] + if extension_ff_name != ".mol2": + raise ValueError( + "ERROR: Please enter enter mol2 file ('mol2_selection') name with the .mol2 extension.") + + if not os.path.exists(mol2_selection): + raise ValueError(f"ERROR: The {mol2_selection} file ('mol2_selection') does not exists.") + + # check if 'forcefield_selection' file is correct format + if not isinstance(forcefield_selection, str): + raise TypeError("ERROR: Please enter xml file ('forcefield_selection') as a string.") + + extension_ff_name = os.path.splitext(forcefield_selection)[-1] + if extension_ff_name != ".xml": + raise ValueError( + "ERROR: Please enter enter xml file ('forcefield_selection') name with the .xml extension.") + + if not os.path.exists(forcefield_selection): + raise ValueError(f"ERROR: The {forcefield_selection} file ('forcefield_selection') does not exists.") + if qm_engine == "gaussian" and manual_dihedral_atom_numbers_list is not None: warn( - "WARNING: The 'dihedral_atom_numbers_list' is set to None, and will not be used, " - "but read from the file directly." + "WARNING: When reading the qm_engine = 'gaussian' files, the " + "'manual_dihedral_atom_numbers_list' is set to None, and will not be used, " + "because the the gaussian log files already contain this information." ) manual_dihedral_atom_numbers_list = None - # write the qm data files data out - if qm_engine == "gaussian": - mdf_frw.write_qm_data_files( - qm_log_files_and_entries_to_remove_dict, - manual_dihedral_atom_numbers_list=manual_dihedral_atom_numbers_list, - qm_engine=qm_engine + # test the temperature_unyt_units input + print_error_value = f"ERROR: The 'temperature_unyt_units' is not temperature of type {type(u.unyt_quantity)}." + if isinstance(temperature_unyt_units, u.unyt_quantity): + if temperature == temperature_unyt_units.units.dimensions: + temperature_unyt_units =temperature_unyt_units.to("K") + + else: + raise ValueError(print_error_value) + + else: + raise TypeError(print_error_value) + + # test the qm_log_files_and_entries_to_remove_dict input + print_error_value = ( + "ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + "with a string keys and list of int>=0 as the values. Example: " + "{'path/HC_CT_CT_HC_part_1.log'): [], 'path/HC_CT_CT_HC_part_2.log'): [0, 5]}" + ) + if isinstance(qm_log_files_and_entries_to_remove_dict, dict): + for key_j, value_j in qm_log_files_and_entries_to_remove_dict.items(): + + if isinstance(key_j, str): + if not os.path.exists(key_j): + raise ValueError( + f"ERROR: The {key_j} file ('qm_log_files_and_entries_to_remove_dict') does not exists." + ) + + else: + raise TypeError(print_error_value) + + print('*****************') + print(f'value_j = {str(value_j)}') + if isinstance(value_j, list): + for int_j in value_j: + if not isinstance(int_j, int) or int_j < 0: + raise TypeError(print_error_value) + + else: + raise TypeError(print_error_value) + + else: + raise TypeError(print_error_value) + + # check if 'gomc_binary_path' leads to the file is correct format GOMC_CPU_NVT + if not isinstance(gomc_binary_path, str): + raise TypeError("ERROR: Please enter the 'gomc_binary_path' file as a string.") + + if not os.path.exists(f"{gomc_binary_path}/{'GOMC_CPU_NVT'}"): + raise ValueError( + f"ERROR: The 'gomc_binary_path' file does not exist or contain the GOMC 'GOMC_CPU_NVT' file." ) - elif qm_engine == "gaussian_style_final_files": - mdf_frw.write_qm_data_files( - qm_log_files_and_entries_to_remove_dict, - manual_dihedral_atom_numbers_list=manual_dihedral_atom_numbers_list, - qm_engine=qm_engine + + # test the 'zeroed_dihedral_atom_types' input + print_error_value = ( + "ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + "lists with 4 strings each. Example: " + "[['CT', 'CT, 'CT, 'HC'], ['NT', 'CT, 'CT, 'HC']]." + ) + if isinstance(zeroed_dihedral_atom_types, (list, type(None))): + if isinstance(zeroed_dihedral_atom_types, list): + for list_j in zeroed_dihedral_atom_types: + if isinstance(list_j, list) and len(list_j)==4: + for str_j in list_j: + if not isinstance(str_j, str): + raise TypeError(print_error_value) + + else: + raise TypeError(print_error_value) + + else: + raise TypeError(print_error_value) + + # test the 'atom_type_naming_style' input + if isinstance(atom_type_naming_style, str): + if not atom_type_naming_style in ["general", "all_unique"]: + raise ValueError( + f"ERROR: The 'atom_type_naming_style' = {atom_type_naming_style}, which is not " + f"any of the available options. " + f"The options are 'general' or 'all_unique'." + ) + + else: + raise TypeError( + f"ERROR: The 'atom_type_naming_style' is a {type(atom_type_naming_style)}, but it needs to be a str." ) + # test the 'fit_min_validated_r_squared' input + if isinstance(fit_min_validated_r_squared, float): + if not (fit_min_validated_r_squared>0 and fit_min_validated_r_squared<1): + raise ValueError( + f"ERROR: The 'fit_min_validated_r_squared'= {fit_min_validated_r_squared}, " + f"but it must be a 0 0 : + raise ValueError( + f"ERROR: The 'fit_validation_r_squared_rtol' = {fit_validation_r_squared_rtol}, " + f"but it must be a float>0.") + + else: + raise TypeError( + f"ERROR: The 'fit_validation_r_squared_rtol' is a {type( fit_validation_r_squared_rtol)}, " + f"but it must be a float." + ) + + # test the 'gomc_cpu_cores' input + if isinstance(gomc_cpu_cores, int): + if gomc_cpu_cores<=0: + raise ValueError( + f"ERROR: The 'gomc_cpu_cores' = {gomc_cpu_cores}, and it must be an int > 0." + ) + + else: + raise TypeError( + f"ERROR: The 'gomc_cpu_cores' is a {type(gomc_cpu_cores)}, but it needs to be a int." + ) + + # check values write the qm data files data out + if isinstance(qm_engine , str): + if qm_engine == "gaussian": + mdf_frw.write_qm_data_files( + qm_log_files_and_entries_to_remove_dict, + manual_dihedral_atom_numbers_list=manual_dihedral_atom_numbers_list, + qm_engine=qm_engine + ) + elif qm_engine == "gaussian_style_final_files": + mdf_frw.write_qm_data_files( + qm_log_files_and_entries_to_remove_dict, + manual_dihedral_atom_numbers_list=manual_dihedral_atom_numbers_list, + qm_engine=qm_engine + ) + + else: + raise ValueError( + f"ERROR: The 'qm_engine' = {qm_engine}, which is not " + f"any of the available options. " + f"The options are 'gaussian' or 'gaussian_style_final_files'." + ) + + else: + raise TypeError( + f"ERROR: The 'qm_engine' is a {type(qm_engine)}, but it needs to be a str." ) # ************************************************************** @@ -703,7 +876,7 @@ def fit_dihedral_with_gomc( f'{gomc_runs_folder_name}/{control_file_name_str}', 'NVT', MC_steps, - temperature, + temperature_unyt_units, ff_psf_pdb_file_directory=None, check_input_files_exist=False, Parameters=f"{output_gomc_pdb_psf_ff_file_name_str}_dihedrals_zeroed.inp", @@ -725,7 +898,7 @@ def fit_dihedral_with_gomc( "Pressure": None, "Ewald": True, "ElectroStatic": True, - "VDWGeometricSigma": override_VDWGeometricSigma, + "VDWGeometricSigma": VDWGeometricSigma, "Rcut": Rcut, "RcutLow": RcutLow, "LRC": LRC, @@ -2062,7 +2235,7 @@ def fit_dihedral_with_gomc( f'{gomc_runs_folder_name}/{control_file_name_fitted_str}', 'NVT', MC_steps, - temperature, + temperature_unyt_units, ff_psf_pdb_file_directory=None, check_input_files_exist=False, Parameters=f"{output_gomc_pdb_psf_ff_file_name_str}_OPLS_fit_{opls_fit_q}_dihedral.inp", @@ -2084,7 +2257,7 @@ def fit_dihedral_with_gomc( "Pressure": None, "Ewald": True, "ElectroStatic": True, - "VDWGeometricSigma": override_VDWGeometricSigma, + "VDWGeometricSigma": VDWGeometricSigma, "Rcut": Rcut, "RcutLow": RcutLow, "LRC": LRC, @@ -2317,25 +2490,26 @@ def fit_dihedral_with_gomc( ): raise ValueError( f"ERROR: The calculated R-squared energy values from the fit type " - f"{opls_fit_data_non_zero_k_constants_list[opls_q]} does not match " - f"the validated case for " - f"'fit_min_validated_r_squared' >= {fit_min_validated_r_squared}, " - f"within the relative tolerance or " - f"'fit_validation_r_squared_rtol' = {fit_validation_r_squared_rtol}. \n" + f"{opls_fit_data_non_zero_k_constants_list[opls_q]} " + f"does not match the validated case for 'fit_min_validated_r_squared' >= " \ + f"{mdf_math.round_to_sig_figs(fit_min_validated_r_squared,sig_figs=8)}, " + f"within the relative tolerance or 'fit_validation_r_squared_rtol' = " + f"{mdf_math.round_to_sig_figs(fit_validation_r_squared_rtol,sig_figs=8)}. \n" f"- Fit via the individual or multi-dihedral fit, when " f"Gaussian minus GOMC with the selected dihedral set to zero \n" - f"--> R-squared = {opls_fit_data_r_squared_list[opls_q]} \n" + f"--> R-squared = " + f"{mdf_math.round_to_sig_figs(opls_fit_data_r_squared_list[opls_q],sig_figs=8)} \n" f"- Fit via the validation test case, when " f"Gaussian minus GOMC with the selected individual dihedral added in GOMC \n" - f"-- >R-squared = {opls_r_squared_fitted_data_via_gomc_list[opls_q]} \n" + f"-- >R-squared = " + f"{mdf_math.round_to_sig_figs(opls_r_squared_fitted_data_via_gomc_list[opls_q],sig_figs=8)} \n" f"The 'fit_min_validated_r_squared' and 'fit_validation_r_squared_rtol' " f"variables may need to be adjusted, \n" f"there is likely something wrong with the fitting procedure, the " f"software parameters need tuned, or there is a bug in the software. \n\n " f"NOTE: Since the R-squared values are calculated via different parameters, \n" f"the compared R-squared values could be very different if they are not nearly \n" - f"a perfect fit (R-squared --> 0.98 to 1)." - f"" + f"a perfect fit (R-squared --> ~0.98 to 0.99999999)." ) gomc_fitted_gaussian_kcal_mol_energy_data_txt_file.close() diff --git a/mosdef_dihedral_fit/tests/test_fit_dihedral_with_gomc.py b/mosdef_dihedral_fit/tests/test_fit_dihedral_with_gomc.py index 9dd65dd..863658b 100644 --- a/mosdef_dihedral_fit/tests/test_fit_dihedral_with_gomc.py +++ b/mosdef_dihedral_fit/tests/test_fit_dihedral_with_gomc.py @@ -7,7 +7,7 @@ import mosdef_dihedral_fit.utils.math_operations as mdf_math # user changable variable, as it needs to be run locally -gomc_binary_directory = "/Users/brad/Programs/GOMC/GOMC_2_75/bin" +gomc_binary_directory = "/home/brad/Programs/GOMC/GOMC_2_75/bin" class TestFitDihedralWithGomc(BaseTest): def test_gaussian_log_file_fit_oplsaa_fit_ethane_HC_CT_CT_HC(self): @@ -22,7 +22,7 @@ def test_gaussian_log_file_fit_oplsaa_fit_ethane_HC_CT_CT_HC(self): }, zeroed_dihedral_atom_types=None, qm_engine="gaussian", - override_VDWGeometricSigma=True, + VDWGeometricSigma=False, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, @@ -229,7 +229,7 @@ def test_gaussian_log_file_fit_oplsaa_fit_ethane_HC_CT_CT_HC_with_2_log_files(se }, zeroed_dihedral_atom_types=None, qm_engine="gaussian", - override_VDWGeometricSigma=True, + VDWGeometricSigma=True, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, @@ -436,7 +436,7 @@ def test_gaussian_log_file_fit_oplsaa_protonated_fragment_CT_CT_C_OH_in_COOH(sel }, zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3']], qm_engine="gaussian", - override_VDWGeometricSigma=True, + VDWGeometricSigma=True, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, @@ -1013,7 +1013,7 @@ def test_gaussian_style_files_fit_oplsaa_fit_CT_CT_C_OH_in_COOH_missing_1st_poin manual_dihedral_atom_numbers_list=[3, 2, 1, 4], zeroed_dihedral_atom_types=None, qm_engine="gaussian_style_final_files", - override_VDWGeometricSigma=True, + VDWGeometricSigma=True, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, @@ -1591,7 +1591,7 @@ def test_gaussian_style_files_fit_oplsaa_fit_CT_CT_C_OH_in_COOH(self): manual_dihedral_atom_numbers_list=[3, 2, 1, 4], zeroed_dihedral_atom_types=None, qm_engine="gaussian_style_final_files", - override_VDWGeometricSigma=True, + VDWGeometricSigma=True, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, @@ -2170,7 +2170,7 @@ def test_gaussian_style_files_fit_oplsaa_fit_CT_CT_C_OH_in_COOH_2_files_missing_ manual_dihedral_atom_numbers_list=[3, 2, 1, 4], zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3']], qm_engine="gaussian_style_final_files", - override_VDWGeometricSigma=True, + VDWGeometricSigma=True, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, @@ -2758,7 +2758,7 @@ def test_gaussian_log_file_fit_oplsaa_protonated_fragment_CT_CT_C_OH_in_COOH_bad }, zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3']], qm_engine="gaussian", - override_VDWGeometricSigma=True, + VDWGeometricSigma=True, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, @@ -2789,9 +2789,1290 @@ def test_gaussian_style_files_fit_oplsaa_protonated_fragment_CT_CT_C_OH_in_COOH_ manual_dihedral_atom_numbers_list=[3, 2, 1, 4], zeroed_dihedral_atom_types=None, qm_engine="gaussian_style_final_files", - override_VDWGeometricSigma=True, + VDWGeometricSigma=True, atom_type_naming_style='general', gomc_cpu_cores=1, fit_min_validated_r_squared=0.99, fit_validation_r_squared_rtol=5e-03 + ) + + def test_gaussian_log_file_variable_VDWGeometricSigma_default(self): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=None, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + with open("GOMC_simulations/GOMC_OPLS_fit_3_dihedral_coords_1.conf", "r") as fp: + variables_read_dict = { + "VDWGeometricSigma": False, + } + out_gomc = fp.readlines() + for i, line in enumerate(out_gomc): + if line.startswith("VDWGeometricSigma "): + variables_read_dict["VDWGeometricSigma"] = True + split_line = line.split() + assert split_line[1] == "True" + + pass + + assert variables_read_dict == { + "VDWGeometricSigma": True, + } + + def test_gaussian_log_file_variable_VDWGeometricSigma_True(self): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=None, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + with open("GOMC_simulations/GOMC_OPLS_fit_3_dihedral_coords_1.conf", "r") as fp: + variables_read_dict = { + "VDWGeometricSigma": False, + } + out_gomc = fp.readlines() + for i, line in enumerate(out_gomc): + if line.startswith("VDWGeometricSigma "): + variables_read_dict["VDWGeometricSigma"] = True + split_line = line.split() + assert split_line[1] == "True" + + pass + + assert variables_read_dict == { + "VDWGeometricSigma": True, + } + + def test_gaussian_log_file_variable_VDWGeometricSigma_False(self): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + with open("GOMC_simulations/GOMC_OPLS_fit_3_dihedral_coords_1.conf", "r") as fp: + variables_read_dict = { + "VDWGeometricSigma": False, + } + out_gomc = fp.readlines() + for i, line in enumerate(out_gomc): + if line.startswith("VDWGeometricSigma "): + variables_read_dict["VDWGeometricSigma"] = True + split_line = line.split() + assert split_line[1] == "False" + + pass + + assert variables_read_dict == { + "VDWGeometricSigma": True, + } + + def test_bad_fit_dihedral_atom_types_input_list_of_3(self): + with pytest.raises( + TypeError, + match=r"ERROR: The input 'fit_dihedral_atom_types' variable = \['HC', 'CT', 'CT'\], " + r"but it needs to be a list of 4 strings, " + r"where the strings are the atom types/classes. Example: \['HC', 'CT', 'CT', 'HC'\]." + ): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_bad_fit_dihedral_atom_types_input_list_of_4_with_int_at_0(self): + with pytest.raises( + TypeError, + match=r"ERROR: The input 'fit_dihedral_atom_types' variable = \[0, 'CT', 'CT', 'HC'\], " + r"but it needs to be a list of 4 strings, " + r"where the strings are the atom types/classes. Example: \['HC', 'CT', 'CT', 'HC'\]." + ): + fit_dihedral_with_gomc( + [0, 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_bad_fit_dihedral_atom_types_input_list_of_4_with_int_at_1(self): + with pytest.raises( + TypeError, + match=r"ERROR: The input 'fit_dihedral_atom_types' variable = \['HC', 1, 'CT', 'HC'\], " + r"but it needs to be a list of 4 strings, " + r"where the strings are the atom types/classes. Example: \['HC', 'CT', 'CT', 'HC'\]." + ): + fit_dihedral_with_gomc( + ['HC', 1, 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_bad_fit_dihedral_atom_types_input_list_of_4_with_int_at_2(self): + with pytest.raises( + TypeError, + match=r"ERROR: The input 'fit_dihedral_atom_types' variable = \['HC', 'CT', 2, 'HC'\], " + r"but it needs to be a list of 4 strings, " + r"where the strings are the atom types/classes. Example: \['HC', 'CT', 'CT', 'HC'\]." + ): + fit_dihedral_with_gomc( + ['HC', 'CT', 2, 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_bad_fit_dihedral_atom_types_input_list_of_4_with_int_at_3(self): + with pytest.raises( + TypeError, + match=r"ERROR: The input 'fit_dihedral_atom_types' variable = \['HC', 'CT', 'CT', 3\], " + r"but it needs to be a list of 4 strings, " + r"where the strings are the atom types/classes. Example: \['HC', 'CT', 'CT', 'HC'\]." + ): + fit_dihedral_with_gomc( + ['HC','CT', 'CT', 3], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_mol2_selection_file_does_not_exist(self): + value_path_mol2='bad_mol2_path.mol2' + with pytest.raises( + ValueError, + match=f"ERROR: The {value_path_mol2} file " + r"\('mol2_selection'\) does not exists."): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + value_path_mol2, + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_mol2_selection_file_no_mol2_extention(self): + value_path_mol2='bad_mol2_path' + with pytest.raises( + ValueError, + match=r"ERROR: Please enter enter mol2 file \('mol2_selection'\) name with the .mol2 extension."): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + value_path_mol2, + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_mol2_selection_file_not_a_string(self): + value_path_mol2=1 + with pytest.raises( + TypeError, + match=r"ERROR: Please enter mol2 file \('mol2_selection'\) as a string."): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + value_path_mol2, + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_xml_selection_file_does_not_exist(self): + value_path_xml='bad_xml_path.xml' + with pytest.raises( + ValueError, + match=f"ERROR: The {value_path_xml} file " + r"\('forcefield_selection'\) does not exists."): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + value_path_xml, + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_xml_selection_file_no_xml_extention(self): + value_path_xml='bad_xml_path' + with pytest.raises( + ValueError, + match=r"ERROR: Please enter enter xml file " + r"\('forcefield_selection'\) name with the .xml extension."): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + value_path_xml, + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_xml_selection_file_not_a_string(self): + value_path_xml=1 + with pytest.raises( + TypeError, + match=r"ERROR: Please enter xml file \('forcefield_selection'\) as a string."): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + value_path_xml, + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_temperature_unyt_units_not_a_temperture_but_pressure(self): + with pytest.raises( + ValueError, + match=f"ERROR: The 'temperature_unyt_units' is not temperature of type {type(u.unyt_quantity)}."): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.bar, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_temperature_unyt_units_not_in_unyt_units(self): + with pytest.raises( + TypeError, + match=f"ERROR: The 'temperature_unyt_units' is not temperature of type {type(u.unyt_quantity)}."): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_not_a_dict(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + ['x'], + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_key_1_not_a_string(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + 1: [], + get_mosdef_dihedral_fit_fn( + 'gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1_copy_for_test.log'): [0,1], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_key_2_not_a_string(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [], + 2: [0,1], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_value_1_not_a_list(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): 's', + get_mosdef_dihedral_fit_fn( + 'gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1_copy_for_test.log'): [0,1], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_value_2_not_a_list(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [], + get_mosdef_dihedral_fit_fn( + 'gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1_copy_for_test.log'): 'x', + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_list_1_not_all_int(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [0, 's'], + get_mosdef_dihedral_fit_fn( + 'gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1_copy_for_test.log'): 'x', + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_list_2_not_all_int(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [], + get_mosdef_dihedral_fit_fn( + 'gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1_copy_for_test.log'): [0, 5, 's'], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_list_1_int_less_than_0(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [-1], + get_mosdef_dihedral_fit_fn( + 'gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1_copy_for_test.log'): [0, 5], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_qm_log_files_and_entries_list_2_int_less_than_0(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'qm_log_files_and_entries_to_remove_dict' is not a dict " + r"with a string keys and list of int>=0 as the values. Example: " + r"\{'path/HC_CT_CT_HC_part_1.log'\): \[\], 'path/HC_CT_CT_HC_part_2.log'\): \[0, 5\]\}" + ): + + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [], + get_mosdef_dihedral_fit_fn( + 'gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1_copy_for_test.log'): [0, -5], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_gomc_binary_path_not_a_string(self): + with pytest.raises( + TypeError, + match=r"ERROR: Please enter the 'gomc_binary_path' file as a string." + ): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + 99999, + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_gomc_binary_path_containing_the_GOMC_CPU_NVT_file_does_not_exist(self): + with pytest.raises( + ValueError, + match=r"ERROR: The 'gomc_binary_path' file does not exist or contain the GOMC 'GOMC_CPU_NVT' file." + ): + fit_dihedral_with_gomc( + ['HC', 'CT', 'CT', 'HC'], + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/input/starting_coords/ethane_aa.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_ethane_HC_CT_CT_HC.xml'), + 298.15 * u.Kelvin, + f"gomc_binary_directory", + { + get_mosdef_dihedral_fit_fn('gaussian/HC_CT_CT_HC/output/HC_CT_CT_HC_multiplicity_1.log'): [], + }, + zeroed_dihedral_atom_types=None, + qm_engine="gaussian", + VDWGeometricSigma=False, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=1e-03 + ) + + def test_zeroed_dihedral_atom_types_not_list(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + r"lists with 4 strings each. Example: " + r"\[\['CT', 'CT, 'CT, 'HC'\], \['NT', 'CT, 'CT, 'HC'\]\]." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types='str', + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_zeroed_dihedral_atom_types_list_1_str(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + r"lists with 4 strings each. Example: " + r"\[\['CT', 'CT, 'CT, 'HC'\], \['NT', 'CT, 'CT, 'HC'\]\]." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=['str', ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_zeroed_dihedral_atom_types_list_2_str(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + r"lists with 4 strings each. Example: " + r"\[\['CT', 'CT, 'CT, 'HC'\], \['NT', 'CT, 'CT, 'HC'\]\]." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], 'str'], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_zeroed_dihedral_atom_types_list_1_not_4_strings(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + r"lists with 4 strings each. Example: " + r"\[\['CT', 'CT, 'CT, 'HC'\], \['NT', 'CT, 'CT, 'HC'\]\]." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 1, 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_zeroed_dihedral_atom_types_list_2_not_4_strings(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + r"lists with 4 strings each. Example: " + r"\[\['CT', 'CT, 'CT, 'HC'\], \['NT', 'CT, 'CT, 'HC'\]\]." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 2, 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_zeroed_dihedral_atom_types_list_1_not_lenght_4(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + r"lists with 4 strings each. Example: " + r"\[\['CT', 'CT, 'CT, 'HC'\], \['NT', 'CT, 'CT, 'HC'\]\]." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_zeroed_dihedral_atom_types_list_2_not_lenght_4(self): + with pytest.raises( + TypeError, + match=r"ERROR: The 'zeroed_dihedral_atom_types' is not None or a list containing " + r"lists with 4 strings each. Example: " + r"\[\['CT', 'CT, 'CT, 'HC'\], \['NT', 'CT, 'CT, 'HC'\]\]." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_qm_engine_not_correct_value(self): + with pytest.raises( + ValueError, + match=f"ERROR: The 'qm_engine' = {'x'}, which is not " + f"any of the available options. " + f"The options are 'gaussian' or 'gaussian_style_final_files'." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="x", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_qm_engine_not_a_string(self): + with pytest.raises( + TypeError, + match=f"ERROR: The 'qm_engine' is a {type(['x'])}, but it needs to be a str." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine=["x"], + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_atom_type_naming_style_not_correct_value(self): + with pytest.raises( + ValueError, + match=f"ERROR: The 'atom_type_naming_style' = {'x'}, which is not " + f"any of the available options. " + f"The options are 'general' or 'all_unique'." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='x', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_atom_type_naming_style_not_a_string(self): + with pytest.raises( + TypeError, + match=f"ERROR: The 'atom_type_naming_style' is a {type(['x'])}, but it needs to be a str." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style=["x"], + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_gomc_cpu_cores_not_correct_value(self): + with pytest.raises( + ValueError, + match=f"ERROR: The 'gomc_cpu_cores' = {0}, and it must be an int > 0." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=0, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_gomc_cpu_cores_not_a_int(self): + with pytest.raises( + TypeError, + match=f"ERROR: The 'gomc_cpu_cores' is a {type(1.000)}, but it needs to be a int." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1.000, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.02 + ) + + def test_fit_min_validated_r_squared_not_correct_value_is_0(self): + with pytest.raises( + ValueError, + match=f"ERROR: The 'fit_min_validated_r_squared'= {0.00}, " + f"but it must be a 00" + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.00 + ) + + def test_fit_validation_r_squared_rtol_not_correct_value_is_1(self): + with pytest.raises( + ValueError, + match=f"ERROR: The 'fit_validation_r_squared_rtol' = {-1.00}, " + f"but it must be a float>0." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=-1.00 + ) + + def test_fit_validation_r_squared_rtol_not_a_float(self): + with pytest.raises( + TypeError, + match=f"ERROR: The 'fit_validation_r_squared_rtol' is a {type(2)}, " + f"but it must be a float." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3'], ['HC', 'CT', 'CT', 'C']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=2 + ) + + def test_warning_fit_min_validated_r_squared_and_fit_validation_r_squared_rtol_need_adjusted(self): + with pytest.raises( + ValueError, + match=f"ERROR: The calculated R-squared energy values from the fit type " + f"{'1_2_3'} " + f"does not match the validated case for 'fit_min_validated_r_squared' >= " \ + f"{'0.99'}, " + f"within the relative tolerance or 'fit_validation_r_squared_rtol' = " + f"{'2e-07'}. \n" + f"- Fit via the individual or multi-dihedral fit, when " + f"Gaussian minus GOMC with the selected dihedral set to zero \n" + f"--> R-squared = " + f"{'0.99792638'} \n" + f"- Fit via the validation test case, when " + f"Gaussian minus GOMC with the selected individual dihedral added in GOMC \n" + f"-- >R-squared = " + f"{'0.98698695'} \n" + f"The 'fit_min_validated_r_squared' and 'fit_validation_r_squared_rtol' " + f"variables may need to be adjusted, \n" + f"there is likely something wrong with the fitting procedure, the " + f"software parameters need tuned, or there is a bug in the software. \n\n " + f"NOTE: Since the R-squared values are calculated via different parameters, \n" + f"the compared R-squared values could be very different if they are not nearly \n" + r"a perfect fit \(R-squared --> ~0.98 to 0.99999999\)." + ): + fit_dihedral_with_gomc( + ['CT', 'CT', 'C', 'OH'], + get_mosdef_dihedral_fit_fn( + 'gaussian_style_output_files/CT_CT_C_OH/input/starting_coords/CT_CT_C_3_OH.mol2'), + get_mosdef_dihedral_fit_fn('oplsaa_CT_CT_C_OH_in_COOH.xml'), + 298.15 * u.Kelvin, + gomc_binary_directory, + { + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_2/output'): [], + get_mosdef_dihedral_fit_fn('gaussian_style_output_files/CT_CT_C_OH_split_part_1/output'): [0], + }, + manual_dihedral_atom_numbers_list=[3, 2, 1, 4], + zeroed_dihedral_atom_types=[['CT', 'CT', 'C', 'O_3']], + qm_engine="gaussian_style_final_files", + VDWGeometricSigma=True, + atom_type_naming_style='general', + gomc_cpu_cores=1, + fit_min_validated_r_squared=0.99, + fit_validation_r_squared_rtol=0.0000002 ) \ No newline at end of file diff --git a/mosdef_dihedral_fit/utils/file_read_and_write.py b/mosdef_dihedral_fit/utils/file_read_and_write.py index 7d021e5..7022fa2 100755 --- a/mosdef_dihedral_fit/utils/file_read_and_write.py +++ b/mosdef_dihedral_fit/utils/file_read_and_write.py @@ -1706,8 +1706,9 @@ def change_gomc_ff_file_dihedral_values( or not isinstance(fit_dihedral_atom_types[2], str) \ or not isinstance(fit_dihedral_atom_types[3], str): raise TypeError( - f"ERROR: The 'fit_dihedral_atom_types' variable need to be a list of 4 strings, " - f"where the strings are the atom types/classes.") + f"ERROR: The input 'fit_dihedral_atom_types' variable = {fit_dihedral_atom_types}, " + f"but it needs to be a list of 4 strings, " + f"where the strings are the atom types/classes. Example: ['HC', 'CT', 'CT', 'HC'].") # check if the other dihedral which need zeroed are input correctly zeroed_dihedral_atom_types_error = ( diff --git a/mosdef_dihedral_fit/utils/io.py b/mosdef_dihedral_fit/utils/io.py index 0b62faa..6d99492 100644 --- a/mosdef_dihedral_fit/utils/io.py +++ b/mosdef_dihedral_fit/utils/io.py @@ -5,22 +5,19 @@ def get_mosdef_dihedral_fit_fn(filename): - """Get the full path to one of the reference testing files provided with utils. - In this source distribution, these files are in ``mosdef_dihedral_fit/utils/files``, - but on installation, they're moved to somewhere in the user's python - site-packages directory. + """Get the whole path name for the file in the mosdef_dihedral_fit/utils/files directory. Parameters ---------- filename : str - Name of the file to load (with respect to the files/folder). + The name of the file in the selected directory (mosdef_dihedral_fit/utils/files). Returns ------- - fn : str + full_path_and_filename : str Full path to filename """ - fn = resource_filename( + full_path_and_filename = resource_filename( "mosdef_dihedral_fit", os.path.join("utils", "files", filename) ) - if not os.path.exists(fn): - raise IOError("Sorry! {} does not exists.".format(fn)) - return fn + if not os.path.exists(full_path_and_filename): + raise ValueError(f"ERROR: The {full_path_and_filename} does not exists.") + return full_path_and_filename