From 20f0d3a82472d0975701c1fc167af362c14e0531 Mon Sep 17 00:00:00 2001 From: CalCraven Date: Sun, 22 Oct 2023 19:26:38 -0500 Subject: [PATCH 1/2] fix gauss vs guass types --- .../dihedral_fit/fit_dihedral_with_gomc.py | 116 +++++++++--------- 1 file changed, 58 insertions(+), 58 deletions(-) diff --git a/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py b/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py index f84347a..717200f 100755 --- a/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py +++ b/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py @@ -62,7 +62,7 @@ def fit_dihedral_with_gomc( used for the fit, and only uses the valid power in the fitting process. - NOTE: The 'extracted_guassian_data' and 'GOMC_simulations' + NOTE: The 'extracted_gaussian_data' and 'GOMC_simulations' folder are deleted at the beginning of this function, and recreated while running this function to ensure only the lasted data is in these folders. @@ -112,12 +112,12 @@ def fit_dihedral_with_gomc( These removed parameters allow users to remove any bad or repeated data points for the QM log file when needed. - Example 1: {'path/guassian_log_file.log': []} + Example 1: {'path/gaussian_log_file.log': []} - Uses all the optimized data points from the 'path/guassian_log_file.log' file. + Uses all the optimized data points from the 'path/gaussian_log_file.log' file. - Example 2: {'path/guassian_log_file.log': [0, 23]} - Uses all data points from the 'path/guassian_log_file.log' file, except points + Example 2: {'path/gaussian_log_file.log': [0, 23]} + Uses all data points from the 'path/gaussian_log_file.log' file, except points 0 and 23. NOTE: Python counting starts at 0. * qm_engine="gaussian_style_final_files" @@ -160,10 +160,10 @@ def fit_dihedral_with_gomc( | 10 No Show 10 H -3.224767 1.255506 -0.130085 Example 1: {'path_to_gaussian_style_final_files': []} - Uses all the optimized data points from the 'path/guassian_log_file.log' file. + Uses all the optimized data points from the 'path/gaussian_log_file.log' file. Example 2: {'path_to_gaussian_style_final_files': [0, 23]} - Uses all data points from the 'path/guassian_log_file.log' file, except points + Uses all data points from the 'path/gaussian_log_file.log' file, except points 0 and 23. NOTE: Python counting starts at 0. manual_dihedral_atom_numbers_list: list of 4 integers, default=None @@ -184,7 +184,7 @@ def fit_dihedral_with_gomc( 'GOMC_simulations' folder. These files can also be checked to confirm it is zeroing the correct dihedrals. - qm_engine: str (currently only 'guassian'), default='guassian' + qm_engine: str (currently only 'gaussian'), default='gaussian' The Quantum Mechanics (QM) simulation engine utilized to produce the files listed in the 'qm_log_files_and_entries_to_remove_dict' variable(s). VDWGeometricSigma: boolean, default = None @@ -362,11 +362,11 @@ def fit_dihedral_with_gomc( every additional phi dihedral angles selected from the QM simulations. The variable YYY is the cos power equation combinations used for the 'fit_dihedral_atom_types' to fit the k-values. - extracted_guassian_data/dihedral.txt + extracted_gaussian_data/dihedral.txt The QM data in a Gaussian-style output file which holds the scanned dihedral angles, in degrees, and the optimized energy value, in Hartree units, for the molecule/fragment. - extracted_guassian_data/dihedral_coords_position_XXX.txt + extracted_gaussian_data/dihedral_coords_position_XXX.txt The optimized QM dihedral coordinates in a Gaussian-style output file. The XXX is the integer number of the dihedrals starting at 1, adding 1 for every additional phi dihedral angles selected from the QM simulations. @@ -830,7 +830,7 @@ def fit_dihedral_with_gomc( shutil.rmtree(xyz_xsc_coor_files_directory) os.mkdir(xyz_xsc_coor_files_directory) - # write all the xyz coordinate from the Guassian optimized coordinate file in the 'xyz_files' folder + # write all the xyz coordinate from the gaussian optimized coordinate file in the 'xyz_files' folder [ atom_pdb_names_list, elementpdb_names_list, @@ -838,26 +838,26 @@ def fit_dihedral_with_gomc( f"{gomc_runs_folder_name}/{output_gomc_pdb_psf_ff_file_name_str}.pdb" ) - qm_energy_file_dir_and_name = "extracted_guassian_data/dihedral.txt" + qm_energy_file_dir_and_name = "extracted_gaussian_data/dihedral.txt" qm_parital_coordinate_file_starting_dir_and_name = ( - "extracted_guassian_data/dihedral_coords_position_" + "extracted_gaussian_data/dihedral_coords_position_" ) qm_coordinate_file_extension = "txt" # check the gaussian file is correct - mdf_frw.check_guassian_angle_energy_file_correct( + mdf_frw.check_gaussian_angle_energy_file_correct( qm_energy_file_dir_and_name ) # Read the gaussian data and extract angles and number of scans (number of angles and degress analyzed) - Guassian_raw_degrees_list = ( + gaussian_raw_degrees_list = ( pd.DataFrame( pd.read_csv(qm_energy_file_dir_and_name, sep="\s+", header=3) ) .iloc[:, 0] .tolist() ) - total_qm_scans = len(Guassian_raw_degrees_list) + total_qm_scans = len(gaussian_raw_degrees_list) mdf_frw.write_xyz_file_from_gaussian_coordinates( elementpdb_names_list, @@ -885,7 +885,7 @@ def fit_dihedral_with_gomc( # ************************************************************** # write the GOMC control files - for scan_iter in range(1, len(Guassian_raw_degrees_list) + 1): + for scan_iter in range(1, len(gaussian_raw_degrees_list) + 1): read_gomc_restart_file_coor_dir_and_name = f"../{xyz_xsc_coor_files_directory}/dihedral_coords_position_{scan_iter}.coor" read_gomc_restart_file_xsc_dir_and_name = ( f"../{xyz_xsc_coor_files_directory}/starting_point.xsc" @@ -1052,7 +1052,7 @@ def fit_dihedral_with_gomc( log_file_splitline_iter = log_file_line_iter.split() # scan_iter starts at 1 - dihedral_angle_degrees = Guassian_raw_degrees_list[scan_iter - 1] + dihedral_angle_degrees = gaussian_raw_degrees_list[scan_iter - 1] # only open the gomc raw energy file and write header for 1st iteration (1) if len(log_file_splitline_iter) >= 2: @@ -1152,35 +1152,35 @@ def fit_dihedral_with_gomc( # ********************************* # extract the raw data - Guassian_data_df = pd.DataFrame( + gaussian_data_df = pd.DataFrame( pd.read_csv(qm_energy_file_dir_and_name, sep="\s+", header=3) ) - Guassian_data_dihedral_degrees_list = Guassian_data_df.iloc[:, 0].tolist() - Guassian_data_total_energy_Hartree_list = Guassian_data_df.iloc[ + gaussian_data_dihedral_degrees_list = gaussian_data_df.iloc[:, 0].tolist() + gaussian_data_total_energy_Hartree_list = gaussian_data_df.iloc[ :, 1 ].tolist() # convert from Hartree to kcal/mol energy units - Guassian_data_total_energy_kcal_per_mol_list = [ + gaussian_data_total_energy_kcal_per_mol_list = [ i * conversion_hartree_to_kcal_per_mol - for i in Guassian_data_total_energy_Hartree_list + for i in gaussian_data_total_energy_Hartree_list ] # normalize so the min value is 0 - Guassian_data_total_energy_kcal_per_mol_normalize_list = [ - i - min(Guassian_data_total_energy_kcal_per_mol_list) - for i in Guassian_data_total_energy_kcal_per_mol_list + gaussian_data_total_energy_kcal_per_mol_normalize_list = [ + i - min(gaussian_data_total_energy_kcal_per_mol_list) + for i in gaussian_data_total_energy_kcal_per_mol_list ] print( - f"Guassian_data_dihedral_degrees_list = {Guassian_data_dihedral_degrees_list}" + f"gaussian_data_dihedral_degrees_list = {gaussian_data_dihedral_degrees_list}" ) print( - f"Guassian_data_total_energy_kcal_per_mol_list = {Guassian_data_total_energy_kcal_per_mol_list}" + f"gaussian_data_total_energy_kcal_per_mol_list = {gaussian_data_total_energy_kcal_per_mol_list}" ) print( - f"Guassian_data_total_energy_kcal_per_mol_normalize_list = " - f"{Guassian_data_total_energy_kcal_per_mol_normalize_list}" + f"gaussian_data_total_energy_kcal_per_mol_normalize_list = " + f"{gaussian_data_total_energy_kcal_per_mol_normalize_list}" ) # get the Gaussian minus GOMC total energy and then it normalized @@ -1188,10 +1188,10 @@ def fit_dihedral_with_gomc( GOMC_data_dihedral_degrees_list ) Gaussian_minus_GOMC_data_total_energy_kcal_per_mol_list = [ - Guassian_data_total_energy_kcal_per_mol_normalize_list[i] + gaussian_data_total_energy_kcal_per_mol_normalize_list[i] - GOMC_data_total_energy_kcal_per_mol_normalize_list[i] for i in range( - 0, len(Guassian_data_total_energy_kcal_per_mol_normalize_list) + 0, len(gaussian_data_total_energy_kcal_per_mol_normalize_list) ) ] @@ -1287,9 +1287,9 @@ def fit_dihedral_with_gomc( not len(GOMC_data_dihedral_degrees_list) == len(GOMC_data_total_energy_kcal_per_mol_list) == len(GOMC_data_total_energy_kcal_per_mol_normalize_list) - == len(Guassian_data_dihedral_degrees_list) - == len(Guassian_data_total_energy_kcal_per_mol_list) - == len(Guassian_data_total_energy_kcal_per_mol_normalize_list) + == len(gaussian_data_dihedral_degrees_list) + == len(gaussian_data_total_energy_kcal_per_mol_list) + == len(gaussian_data_total_energy_kcal_per_mol_normalize_list) == len(Gaussian_minus_GOMC_data_dihedral_degrees_list) == len(Gaussian_minus_GOMC_data_total_energy_kcal_per_mol_list) == len( @@ -1298,7 +1298,7 @@ def fit_dihedral_with_gomc( == len(all_sum_opls_const_1_plus_or_minus_cos_n_list) ): raise ValueError( - "ERROR: The GOMC and Guassian outputs do not match in length. " + "ERROR: The GOMC and gaussian outputs do not match in length. " "This could mean something is changed and wrong in the code, " "or GOMC is outputting multiple Initial eneries in the log file " ", in this case use a new version of GOMC." @@ -1307,19 +1307,19 @@ def fit_dihedral_with_gomc( # Check if all the angles match between sorted GOMC and Gaussian data for j_angle in range(0, len(GOMC_data_dihedral_degrees_list)): if not len(GOMC_data_dihedral_degrees_list) == len( - Guassian_data_dihedral_degrees_list + gaussian_data_dihedral_degrees_list ): raise ValueError( - "ERROR: The GOMC and Guassian output angles are not in the same angles in order." + "ERROR: The GOMC and gaussian output angles are not in the same angles in order." ) # Check if all the angles match between sorted GOMC and Gaussian data for k_angle in range(0, len(GOMC_data_dihedral_degrees_list)): if not len(GOMC_data_dihedral_degrees_list) == len( - Guassian_data_dihedral_degrees_list + gaussian_data_dihedral_degrees_list ): raise ValueError( - "ERROR: The GOMC and Guassian output angles are not in the same angles in order." + "ERROR: The GOMC and gaussian output angles are not in the same angles in order." ) if k_angle == 0: # write out the GOMC and Gaussian data in a file @@ -1342,7 +1342,7 @@ def fit_dihedral_with_gomc( gomc_gaussian_kcal_per_mol_energy_data_txt_file.write( f"{Gaussian_minus_GOMC_data_dihedral_degrees_list[k_angle]: <30} " f"{GOMC_data_total_energy_kcal_per_mol_normalize_list[k_angle]: <30} " - f"{Guassian_data_total_energy_kcal_per_mol_normalize_list[k_angle]: <30} " + f"{gaussian_data_total_energy_kcal_per_mol_normalize_list[k_angle]: <30} " f"{Gaussian_minus_GOMC_data_total_energy_kcal_per_mol_normalized_list[k_angle]: <40} " f"{const_1_minus_Cos_0_phi_data_lists[k_angle]: <30} " f"{const_1_plus_Cos_1_phi_data_lists[k_angle]: <30} " @@ -1383,7 +1383,7 @@ def fit_dihedral_with_gomc( ( sorted_Gaussian_minus_GOMC_data_dihedral_degrees_list, sorted_GOMC_data_total_energy_kcal_per_mol_normalize_list, - sorted_Guassian_data_total_energy_kcal_per_mol_normalize_list, + sorted_gaussian_data_total_energy_kcal_per_mol_normalize_list, sorted_Gaussian_minus_GOMC_data_total_energy_kcal_per_mol_normalized_list, sorted_all_sum_opls_const_1_plus_or_minus_cos_n_list, sorted_const_1_minus_Cos_0_phi_data_lists, @@ -1396,7 +1396,7 @@ def fit_dihedral_with_gomc( zip( Gaussian_minus_GOMC_data_dihedral_degrees_list, GOMC_data_total_energy_kcal_per_mol_normalize_list, - Guassian_data_total_energy_kcal_per_mol_normalize_list, + gaussian_data_total_energy_kcal_per_mol_normalize_list, Gaussian_minus_GOMC_data_total_energy_kcal_per_mol_normalized_list, all_sum_opls_const_1_plus_or_minus_cos_n_list, const_1_minus_Cos_0_phi_data_lists, @@ -1412,7 +1412,7 @@ def fit_dihedral_with_gomc( f"sorted_GOMC_data_total_energy_kcal_per_mol_normalize_list = {sorted_GOMC_data_total_energy_kcal_per_mol_normalize_list}" ) print( - f"sorted_Guassian_data_total_energy_kcal_per_mol_normalize_list = {sorted_Guassian_data_total_energy_kcal_per_mol_normalize_list}" + f"sorted_gaussian_data_total_energy_kcal_per_mol_normalize_list = {sorted_gaussian_data_total_energy_kcal_per_mol_normalize_list}" ) print( f"sorted_Gaussian_minus_GOMC_data_total_energy_kcal_per_mol_normalized_list = {sorted_Gaussian_minus_GOMC_data_total_energy_kcal_per_mol_normalized_list}" @@ -2430,7 +2430,7 @@ def fit_dihedral_with_gomc( f"all_normalized_energies_OPLS_fit_{opls_fit_q}_in_kcal_per_mol.txt" ) # write the GOMC control files - for scan_iter_q in range(1, len(Guassian_raw_degrees_list) + 1): + for scan_iter_q in range(1, len(gaussian_raw_degrees_list) + 1): read_gomc_fitted_restart_file_coor_dir_and_name = f"../{xyz_xsc_coor_files_directory}/dihedral_coords_position_{scan_iter_q}.coor" read_gomc_fitted_restart_file_xsc_dir_and_name = ( f"../{xyz_xsc_coor_files_directory}/starting_point.xsc" @@ -2593,7 +2593,7 @@ def fit_dihedral_with_gomc( log_file_splitline_iter = log_file_line_iter.split() # scan_iter starts at 1 - dihedral_angle_degrees = Guassian_raw_degrees_list[ + dihedral_angle_degrees = gaussian_raw_degrees_list[ scan_iter_q - 1 ] @@ -2680,26 +2680,26 @@ def fit_dihedral_with_gomc( # get Gaussian data (START) # ********************************* # extract the raw data - Guassian_data_df = pd.DataFrame( + gaussian_data_df = pd.DataFrame( pd.read_csv(qm_energy_file_dir_and_name, sep="\s+", header=3) ) - Guassian_data_fitted_dihedral_degrees_list = Guassian_data_df.iloc[ + gaussian_data_fitted_dihedral_degrees_list = gaussian_data_df.iloc[ :, 0 ].tolist() - Guassian_data_total_energy_Hartree_list = Guassian_data_df.iloc[ + gaussian_data_total_energy_Hartree_list = gaussian_data_df.iloc[ :, 1 ].tolist() # convert from Hartree to kcal/mol energy units - Guassian_data_total_energy_kcal_per_mol_list = [ + gaussian_data_total_energy_kcal_per_mol_list = [ i * conversion_hartree_to_kcal_per_mol - for i in Guassian_data_total_energy_Hartree_list + for i in gaussian_data_total_energy_Hartree_list ] # normalize so the min value is 0 - Guassian_data_total_energy_kcal_per_mol_normalize_list = [ - i - min(Guassian_data_total_energy_kcal_per_mol_list) - for i in Guassian_data_total_energy_kcal_per_mol_list + gaussian_data_total_energy_kcal_per_mol_normalize_list = [ + i - min(gaussian_data_total_energy_kcal_per_mol_list) + for i in gaussian_data_total_energy_kcal_per_mol_list ] # get the Gaussian minus GOMC total energy and then it normalized @@ -2707,10 +2707,10 @@ def fit_dihedral_with_gomc( GOMC_data_fitted_dihedral_degrees_list ) Gaussian_minus_GOMC_data_fitted_total_energy_kcal_per_mol_list = [ - Guassian_data_total_energy_kcal_per_mol_normalize_list[i] + gaussian_data_total_energy_kcal_per_mol_normalize_list[i] - GOMC_data_fitted_total_energy_kcal_per_mol_normalize_list[i] for i in range( - 0, len(Guassian_data_total_energy_kcal_per_mol_normalize_list) + 0, len(gaussian_data_total_energy_kcal_per_mol_normalize_list) ) ] @@ -2730,7 +2730,7 @@ def fit_dihedral_with_gomc( # get R**2 for the fit, running through GOMC to get the new energy of the # individual fit. opls_r_squared_fitted_data_via_gomc_iter = mdf_math.get_r_squared( - Guassian_data_total_energy_kcal_per_mol_normalize_list, + gaussian_data_total_energy_kcal_per_mol_normalize_list, GOMC_data_fitted_total_energy_kcal_per_mol_normalize_list, ) opls_r_squared_fitted_data_via_gomc_list.append( @@ -2766,7 +2766,7 @@ def fit_dihedral_with_gomc( gomc_fitted_gaussian_kcal_per_mol_energy_data_txt_file.write( f"\n{Gaussian_minus_GOMC_data_fitted_dihedral_degrees_list[q_angle]: <30} " f"{GOMC_data_fitted_total_energy_kcal_per_mol_normalize_list[q_angle]: <30} " - f"{Guassian_data_total_energy_kcal_per_mol_normalize_list[q_angle]: <30} " + f"{gaussian_data_total_energy_kcal_per_mol_normalize_list[q_angle]: <30} " f"{Gaussian_minus_GOMC_data_fitted_total_energy_kcal_per_mol_normalized_list[q_angle]: <40} " f"{str(opls_k_constant_fitted_q_list_kcal_per_mol[0]): <30} " f"{str(opls_k_constant_fitted_q_list_kcal_per_mol[1]): <30} " From 8eb9211d8ecfd19068a559123a08ef890a1ee4b5 Mon Sep 17 00:00:00 2001 From: CalCraven Date: Sun, 22 Oct 2023 19:41:17 -0500 Subject: [PATCH 2/2] fixes to doc strings --- .../dihedral_fit/fit_dihedral_with_gomc.py | 42 ++----- .../utils/file_read_and_write.py | 112 +++++++++--------- 2 files changed, 69 insertions(+), 85 deletions(-) diff --git a/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py b/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py index 717200f..6a9bffd 100755 --- a/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py +++ b/mosdef_dihedral_fit/dihedral_fit/fit_dihedral_with_gomc.py @@ -92,7 +92,6 @@ def fit_dihedral_with_gomc( the system. * Example str for FF file: 'path_to file/trappe-ua.xml' - temperature_unyt_units: unyt.unyt_quantity The temperature of the system that was performed for the Quantum Mechanics (QM) simulation. @@ -102,7 +101,6 @@ def fit_dihedral_with_gomc( This does not include the "GOMC_CPU_NVT" in this variable. Example: '/home/brad/Programs/GOMC/GOMC_2_76/bin' - qm_log_files_and_entries_to_remove_dict: dict, {str: [int>=0, ..., int>=0]} * qm_engine="gaussian" This is a dictionary comprised of a key (string) of the QM log file path and name @@ -183,7 +181,6 @@ def fit_dihedral_with_gomc( 'GOMC_pdb_psf_ff_files_dihedrals_zeroed.inp' files in the 'GOMC_simulations' folder. These files can also be checked to confirm it is zeroing the correct dihedrals. - qm_engine: str (currently only 'gaussian'), default='gaussian' The Quantum Mechanics (QM) simulation engine utilized to produce the files listed in the 'qm_log_files_and_entries_to_remove_dict' variable(s). @@ -259,7 +256,6 @@ def fit_dihedral_with_gomc( --- If the general CHARMM style atom type in any residue/molecule's gomc_fix_bonds_angles, gomc_fix_bonds, or gomc_fix_angles are IN any other residue/molecule. - gomc_cpu_cores: int>0, default=1 The number of CPU-cores that are used to perform the GOMC simulations, required for the Molecular Mechanics (MM) energy calulations. @@ -285,7 +281,6 @@ def fit_dihedral_with_gomc( dihedral energies are set to zero. NOTE: This value may need adjusted to get the dihedral fit to solve correctly. - fit_validation_r_squared_rtol: float (0 < fit_min_validated_r_squared < 1), default=2.5e-02 Where the QM data is defined as the actual data; this is the difference of the dihedral's calculated R-squared values between: @@ -293,17 +288,9 @@ def fit_dihedral_with_gomc( * The MM calculations where the fit k-value are entered in the MM data and compared to the QM data. - fit_dihedral_atom_types, - mol2_selection, - forcefield_selection, - temperature_unyt_units, - gomc_binary_path, - qm_log_files_and_entries_to_remove_dict, - zeroed_dihedral_atom_types=None, - NOTE: This value may need adjusted to get the dihedral fit to solve correctly. - Returns + Outputs ------- Files containing the following information in the following relative locations: @@ -1305,7 +1292,7 @@ def fit_dihedral_with_gomc( ) # Check if all the angles match between sorted GOMC and Gaussian data - for j_angle in range(0, len(GOMC_data_dihedral_degrees_list)): + for _ in range(0, len(GOMC_data_dihedral_degrees_list)): if not len(GOMC_data_dihedral_degrees_list) == len( gaussian_data_dihedral_degrees_list ): @@ -1627,10 +1614,10 @@ def fit_dihedral_with_gomc( # ********************************** # Run the fitting for only the allowed power types - for k_iter_i, k_type_i in enumerate(fit_k_list_allowed): + for k_type_i in fit_k_list_allowed: # make the list of k_type_i for fitting in the data k_type_list_i = [] - for v in range( + for _ in range( 0, len(sorted_Gaussian_minus_GOMC_data_dihedral_degrees_list) ): k_type_list_i.append(k_type_i) @@ -1682,7 +1669,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "2": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -1727,7 +1714,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "3": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -1772,7 +1759,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "4": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -1817,7 +1804,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "1_3": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -1861,7 +1848,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "2_4": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -1905,7 +1892,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "1_2": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -1949,7 +1936,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "3_4": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -1993,7 +1980,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "1_2_3": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -2036,7 +2023,7 @@ def fit_dihedral_with_gomc( ) elif k_type_i == "1_2_3_4": - parameters, covariance = curve_fit( + parameters, _ = curve_fit( mdf_math.opls_dihedral, ( np.asarray(k_type_list_i), @@ -2683,9 +2670,6 @@ def fit_dihedral_with_gomc( gaussian_data_df = pd.DataFrame( pd.read_csv(qm_energy_file_dir_and_name, sep="\s+", header=3) ) - gaussian_data_fitted_dihedral_degrees_list = gaussian_data_df.iloc[ - :, 0 - ].tolist() gaussian_data_total_energy_Hartree_list = gaussian_data_df.iloc[ :, 1 ].tolist() diff --git a/mosdef_dihedral_fit/utils/file_read_and_write.py b/mosdef_dihedral_fit/utils/file_read_and_write.py index 1f06e4e..c0ddb47 100755 --- a/mosdef_dihedral_fit/utils/file_read_and_write.py +++ b/mosdef_dihedral_fit/utils/file_read_and_write.py @@ -157,7 +157,7 @@ def write_xyz_file_from_gaussian_coordinates( total_qm_scans: int The number of QM scans in the QM coordinate file. - Returns + Outputs ------- Writes the xyz file in the file in the selected 'xyz_files_directory' directory, with the file name 'dihedral_coords_position_n.xyz'. @@ -199,7 +199,7 @@ def write_xyz_file_from_gaussian_coordinates( output_file_xyz_file.write(f"{comment_1_space_in_front}\n") # The Gaussian file need to have these columns "Row Highlight Display Tag Symbol X Y Z" - check_guassian_optimized_coordinate_file_correct( + check_gaussian_optimized_coordinate_file_correct( read_gausian_file_dir_name ) with open(read_gausian_file_dir_name, "r") as fp: @@ -240,7 +240,7 @@ def write_restart_coor_from_xyz_file(coor_files_directory, total_qm_scans): total_qm_scans: int The number of QM scans in the QM coordinate file. - Returns + Outputs ------- Writes and the restart coor file in the file in the selected 'coor_files_directory' directory, with the file name @@ -293,10 +293,10 @@ def write_restart_coor_from_xyz_file(coor_files_directory, total_qm_scans): # ******************************************* -def check_guassian_angle_energy_file_correct(guassian_energy_file_dir_and_name): - """Check that the Guassian/Gausview file containing the angle and energy is formatted correctly. +def check_gaussian_angle_energy_file_correct(gaussian_energy_file_dir_and_name): + """Check that the gaussian/Gausview file containing the angle and energy is formatted correctly. - This checks the Guassian/Gausview containing the angle and energy is formatted correctly. + This checks the gaussian/Gausview containing the angle and energy is formatted correctly. The proper header format for the GausView/Gaussian output is as follows: @@ -307,7 +307,7 @@ def check_guassian_angle_energy_file_correct(guassian_energy_file_dir_and_name): Parameters ---------- - guassian_energy_file_dir_and_name: str + gaussian_energy_file_dir_and_name: str The directory and filename of the Gaussian/Gausview angle and energy file. Returns @@ -316,7 +316,7 @@ def check_guassian_angle_energy_file_correct(guassian_energy_file_dir_and_name): True; if the file is formatted correctly TypeError; if the file is not formatted correctly """ - with open(guassian_energy_file_dir_and_name, "r") as fp: + with open(gaussian_energy_file_dir_and_name, "r") as fp: gaussian_dihedral_header_line_correct_bool_list = [ False, False, @@ -386,12 +386,12 @@ def check_guassian_angle_energy_file_correct(guassian_energy_file_dir_and_name): return gaussian_angle_energy_file_correct -def check_guassian_optimized_coordinate_file_correct( +def check_gaussian_optimized_coordinate_file_correct( gaussian_optimized_coordinate_path_and_name, ): - """Check that the Guassian/Gausview file containing the optimized coordinates is formatted correctly. + """Check that the gaussian/Gausview file containing the optimized coordinates is formatted correctly. - This checks the Guassian/Gausview containing the optimized coordinates is formatted correctly. + This checks the gaussian/Gausview containing the optimized coordinates is formatted correctly. The proper header format for the GausView/Gaussian output is as follows: @@ -442,9 +442,9 @@ def check_guassian_optimized_coordinate_file_correct( def get_final_gaussian_output_file_data( qm_log_files_and_entries_to_remove_dict, manual_dihedral_atom_numbers_list ): - """Get the Guassian/Gausview file data from the existing Guassian/Gausview files. + """Get the gaussian/Gausview file data from the existing gaussian/Gausview files. - This gets the Guassian/Gausview log file data for all the optimized configurations, + This gets the gaussian/Gausview log file data for all the optimized configurations, moving it to the folder that will be analyzed. The proper header format for the GausView/Gaussian output is as follows: @@ -454,18 +454,18 @@ def get_final_gaussian_output_file_data( Parameters ---------- qm_log_files_and_entries_to_remove_dict: dict, {str: [int, ..., int]} - This is a dictionary comprised of a key (string) of the QM Guassian/Gausview file + This is a dictionary comprised of a key (string) of the QM gaussian/Gausview file data path and name, and a list of integers, which are the QM optimization parameters to remove from the written data, in order of reading from each file. These can be seen in the order of the dictionary file name (strings). These removed parameters allow users to remove any bad or repeated data points for the QM log file when needed. - Example 1: {'path/guassian_log_file.log': []} - Uses all the optimized data points from the 'path/guassian_log_file_data_path' file. + Example 1: {'path/gaussian_log_file.log': []} + Uses all the optimized data points from the 'path/gaussian_log_file_data_path' file. - Example 2: {'path/guassian_log_file.log': [0, 23]} - Uses all data points from the 'path/guassian_log_file_data_path' file, except points + Example 2: {'path/gaussian_log_file.log': [0, 23]} + Uses all data points from the 'path/gaussian_log_file_data_path' file, except points 0 and 23. NOTE: Python counting starts at 0. manual_dihedral_atom_numbers_list: list, list of four (4) int (example: [3,2,1,5]) This is a list of the dihedral atom numbers in order that were used for the dihedral @@ -573,7 +573,7 @@ def get_final_gaussian_output_file_data( direct_gaussian_angles_energy_formated_file_name_iter = ( f"{direct_gaussian_folder_iter}/dihedral.txt" ) - check_guassian_angle_energy_file_correct( + check_gaussian_angle_energy_file_correct( direct_gaussian_angles_energy_formated_file_name_iter ) @@ -583,7 +583,7 @@ def get_final_gaussian_output_file_data( first_enerery_dihedral_file_data_header_lines = 4 first_coord_file_data_header_lines = 1 - # get the 1st QM Guassian/Gausview file angles and dihedrals + # get the 1st QM gaussian/Gausview file angles and dihedrals direct_gaussian_angles_energy_iter = fp1.readlines() for m, line_m in enumerate(direct_gaussian_angles_energy_iter): m_less_spacers = int( @@ -608,7 +608,7 @@ def get_final_gaussian_output_file_data( elif m_less_spacers >= 0 and len(split_line_m) != 2: raise ValueError( f"ERROR: The directly input file {direct_gaussian_angles_energy_formated_file_name_iter} " - f"is not in the correct Guassian sytle format." + f"is not in the correct gaussian sytle format." ) total_used_and_unused_dihedrals_per_file = [ @@ -626,14 +626,14 @@ def get_final_gaussian_output_file_data( if int(dih_per_file_i - 1) not in entries_to_remove_list_iter: dihedral_counter += 1 direct_gaussian_coord_formated_file_name_iter = f"{direct_gaussian_folder_iter}/dihedral_coords_position_{dih_per_file_i}.txt" - check_guassian_optimized_coordinate_file_correct( + check_gaussian_optimized_coordinate_file_correct( direct_gaussian_coord_formated_file_name_iter ) with open( direct_gaussian_coord_formated_file_name_iter, "r" ) as fp2: - # get the 1st QM Guassian/Gausview file angles and dihedrals + # get the 1st QM gaussian/Gausview file angles and dihedrals direct_gaussian_coord_iter = fp2.readlines() for n, line_n in enumerate(direct_gaussian_coord_iter): split_line_n = line_n.split() @@ -661,7 +661,7 @@ def get_final_gaussian_output_file_data( elif n_less_spacers >= 0 and len(split_line_n) != 8: raise ValueError( f"ERROR: The directly input file {direct_gaussian_coord_formated_file_name_iter} " - f"is not in the correct Guassian sytle format" + f"is not in the correct gaussian sytle format" ) # check the values against the past ones @@ -693,7 +693,7 @@ def get_final_gaussian_output_file_data( all_number_of_atoms_list, direct_number_of_atoms_list_iter, "number of atoms", - "Direct Guassian output file", + "Direct gaussian output file", direct_gaussian_coord_formated_file_name_iter, ) @@ -702,7 +702,7 @@ def get_final_gaussian_output_file_data( all_element_names_list, direct_element_names_list_iter, "element names", - "Direct Guassian output file", + "Direct gaussian output file", direct_gaussian_coord_formated_file_name_iter, ) @@ -711,7 +711,7 @@ def get_final_gaussian_output_file_data( all_dihedral_atom_numbers_list, direct_dihedral_atom_numbers_list_iter, "dihedral atom numbers", - "Direct Guassian output file", + "Direct gaussian output file", direct_gaussian_coord_formated_file_name_iter, ) @@ -731,9 +731,9 @@ def get_final_gaussian_output_file_data( def get_gaussian_log_file_data( qm_log_files_and_entries_to_remove_dict, ): - """Get the Guassian/Gausview file data from the log filefor all the optimized configurations. + """Get the gaussian/Gausview file data from the log filefor all the optimized configurations. - This gets the Guassian/Gausview log file data for all the optimized configurations, + This gets the gaussian/Gausview log file data for all the optimized configurations, allowing the data to be analyzed further. The proper header format for the GausView/Gaussian output is as follows: @@ -749,11 +749,11 @@ def get_gaussian_log_file_data( order of the dictionary file name (strings). These removed parameters allow users to remove any bad or repeated data points for the QM log file when needed. - Example 1: {'path/guassian_log_file.log': []} - Uses all the optimized data points from the 'path/guassian_log_file.log' file. + Example 1: {'path/gaussian_log_file.log': []} + Uses all the optimized data points from the 'path/gaussian_log_file.log' file. - Example 2: {'path/guassian_log_file.log': [0, 23]} - Uses all data points from the 'path/guassian_log_file.log' file, except points + Example 2: {'path/gaussian_log_file.log': [0, 23]} + Uses all data points from the 'path/gaussian_log_file.log' file, except points 0 and 23. NOTE: Python counting starts at 0. Returns @@ -868,7 +868,7 @@ def get_gaussian_log_file_data( ): dihedral_scan_line = m - # get the Guassian molecule charge and multiplicity + # get the gaussian molecule charge and multiplicity if ( len(split_line_m) >= 4 and split_line_m[0] == "Charge" @@ -1181,12 +1181,12 @@ def write_qm_data_files( order of the dictionary file name (strings). These removed parameters allow users to remove any bad or repeated data points for the QM log file when needed. - Example 1: {'path/guassian_log_file.log': []} + Example 1: {'path/gaussian_log_file.log': []} - Uses all the optimized data points from the 'path/guassian_log_file.log' file. + Uses all the optimized data points from the 'path/gaussian_log_file.log' file. - Example 2: {'path/guassian_log_file.log': [0, 23]} - Uses all data points from the 'path/guassian_log_file.log' file, except points + Example 2: {'path/gaussian_log_file.log': [0, 23]} + Uses all data points from the 'path/gaussian_log_file.log' file, except points 0 and 23. NOTE: Python counting starts at 0. * qm_engine="gaussian_style_final_files" @@ -1229,10 +1229,10 @@ def write_qm_data_files( | 10 No Show 10 H -3.224767 1.255506 -0.130085 Example 1: {'path_to_gaussian_style_final_files': []} - Uses all the optimized data points from the 'path/guassian_log_file.log' file. + Uses all the optimized data points from the 'path/gaussian_log_file.log' file. Example 2: {'path_to_gaussian_style_final_files': [0, 23]} - Uses all data points from the 'path/guassian_log_file.log' file, except points + Uses all data points from the 'path/gaussian_log_file.log' file, except points 0 and 23. NOTE: Python counting starts at 0. manual_dihedral_atom_numbers_list: list of 4 integers, default=None @@ -1248,7 +1248,7 @@ def write_qm_data_files( Returns ------- - files are written to the created 'extracted_guassian_data' folder: + files are written to the created 'extracted_gaussian_data' folder: - 'dihedral.txt' file is in the standard Gaussian/Gausview format and contains the optimized scanned/rotated dihedral angle and energy in Hartree energy units. @@ -1259,13 +1259,13 @@ def write_qm_data_files( in Angstrom units. """ # delete any existing directories and make a new one - guassian_directory_name = "extracted_guassian_data" - if os.path.isdir(guassian_directory_name): - shutil.rmtree(guassian_directory_name) - os.mkdir(guassian_directory_name) + gaussian_directory_name = "extracted_gaussian_data" + if os.path.isdir(gaussian_directory_name): + shutil.rmtree(gaussian_directory_name) + os.mkdir(gaussian_directory_name) if qm_engine == "gaussian": - # extract the required data to write the Guassian style formatted output files + # extract the required data to write the gaussian style formatted output files [ all_dihedral_angle_degrees_list, all_energy_hartree_list, @@ -1301,7 +1301,7 @@ def write_qm_data_files( ) # write the gaussian style formatted angle (degrees) and energy output files output_file_dihedral_energy = open( - f"extracted_guassian_data/dihedral.txt", "w" + f"extracted_gaussian_data/dihedral.txt", "w" ) output_file_dihedral_energy.write( @@ -1319,10 +1319,10 @@ def write_qm_data_files( output_file_dihedral_energy.close() - # write the guassian style formatted coordinates file + # write the gaussian style formatted coordinates file for qm_j in range(0, len(all_dihedral_angle_degrees_list)): output_file_dihedral_coordinates = open( - f"extracted_guassian_data/dihedral_coords_position_{int(qm_j+1)}.txt", + f"extracted_gaussian_data/dihedral_coords_position_{int(qm_j+1)}.txt", "w", ) @@ -1405,12 +1405,12 @@ def get_matching_dihedral_info_and_opls_fitting_data( order of the dictionary file name (strings). These removed parameters allow users to remove any bad or repeated data points for the QM log file when needed. - Example 1: {'path/guassian_log_file.log': []} + Example 1: {'path/gaussian_log_file.log': []} - Uses all the optimized data points from the 'path/guassian_log_file.log' file. + Uses all the optimized data points from the 'path/gaussian_log_file.log' file. - Example 2: {'path/guassian_log_file.log': [0, 23]} - Uses all data points from the 'path/guassian_log_file.log' file, except points + Example 2: {'path/gaussian_log_file.log': [0, 23]} + Uses all data points from the 'path/gaussian_log_file.log' file, except points 0 and 23. NOTE: Python counting starts at 0. * qm_engine="gaussian_style_final_files" @@ -1707,11 +1707,11 @@ def get_matching_dihedral_info_and_opls_fitting_data( matching_dihedral_types_by_atom_type_list.append(dih_atom_type_p) # check if the QM and PSF file match in atom numbers - # NOTE: Guassian and the PSF file start atom counting at 1. + # NOTE: gaussian and the PSF file start atom counting at 1. # extract the data from the QM log file if qm_engine == "gaussian": - # NOTE: Guassian start atom counting at 1. + # NOTE: gaussian start atom counting at 1. [ dihedral_angle_degrees_list, energy_hartree_list, @@ -1884,7 +1884,7 @@ def change_gomc_ff_file_dihedral_values( Example: [['CT', 'CT, 'CT, 'HC'], ['NT', 'CT, 'CT, 'HC']] - Returns + Outputs ------- Write a modified GOMC/CHARMM style force field file Force files are written copied from the existing force field file