From 3ef4c1ddb9cc6578133990ae0dbe75d883fff68e Mon Sep 17 00:00:00 2001 From: Gaowei Chen02 Date: Wed, 7 Feb 2024 10:55:46 +0800 Subject: [PATCH] Add some error messages that are more helpful for trouble shooting. 1. add check.py in utils: parse the input cv files with PLUMED before running the workflow. 2. add assertions in submit.py and run_select.py to ensure the trust_lvl_1 and std_threshold to be suitable. --- rid/entrypoint/submit.py | 4 ++- rid/op/label_stats.py | 2 ++ rid/op/run_select.py | 4 +++ rid/utils/__init__.py | 3 ++- rid/utils/check.py | 58 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 69 insertions(+), 2 deletions(-) mode change 100644 => 100755 rid/entrypoint/submit.py create mode 100644 rid/utils/check.py diff --git a/rid/entrypoint/submit.py b/rid/entrypoint/submit.py old mode 100644 new mode 100755 index ab1b0c82..cbbfb311 --- a/rid/entrypoint/submit.py +++ b/rid/entrypoint/submit.py @@ -2,7 +2,7 @@ import json from pathlib import Path from typing import List, Union, Optional -from rid.utils import load_json +from rid.utils import load_json, check_cv_file import os from dflow import ( @@ -224,6 +224,8 @@ def submit_rid( if len(cvfile_list) == 0: cv_file_artifact = None else: + Rsl, Rmsg = check_cv_file(cvfile_list) + assert Rsl, f"An error occurred while parsing cv_files:\n\n{Rmsg}" cv_file_artifact = upload_artifact([Path(p) for p in cvfile_list], archive=None) if len(dpfile_list) == 0: diff --git a/rid/op/label_stats.py b/rid/op/label_stats.py index 53b7e3a7..8d09d23a 100644 --- a/rid/op/label_stats.py +++ b/rid/op/label_stats.py @@ -91,6 +91,8 @@ def execute( higher_index.add(i) higher_index_list = list(higher_index) print("higher index list", list(cv_forces_list[higher_index_list])) + assert len(higher_index_list) < len(mf_all_std_list), \ + f"All the std are higher than the std_threshold ({op_in["std_threshold"]}), please lower the std_threshold." mf_all_std_list_modified = np.delete(mf_all_std_list, higher_index_list, axis=0) cv_forces_list_modified = np.delete(cv_forces_list, higher_index_list, axis=0) assert len(mf_all_std_list_modified) == len(cv_forces_list_modified) diff --git a/rid/op/run_select.py b/rid/op/run_select.py index 24ab1e91..033663fc 100644 --- a/rid/op/run_select.py +++ b/rid/op/run_select.py @@ -110,6 +110,10 @@ def execute( else: stds = make_std(cls_sel_data, models=op_in["models"]) save_txt("cls_"+model_devi_name, stds, fmt=model_devi_precision) + assert max(stds) > trust_lvl_1, f""" + The maximum deviation of the models ({max(stds)}) is smaller than trust_lvl_1 + ({trust_lvl_1}), causing the selected indices to be empty. Please enlarge trust_lvl_1. + """ _selected_idx = select_from_devi(stds, op_in["trust_lvl_1"]) sel_idx = cls_sel_idx[_selected_idx] np.save(sel_ndx_name, sel_idx) diff --git a/rid/utils/__init__.py b/rid/utils/__init__.py index f3b6009b..8ef83e58 100644 --- a/rid/utils/__init__.py +++ b/rid/utils/__init__.py @@ -12,4 +12,5 @@ from rid.utils.format import list_to_string from rid.utils.command import run_command from rid.utils.path import set_directory -from rid.utils.set_config import init_executor, normalize_resources \ No newline at end of file +from rid.utils.set_config import init_executor, normalize_resources +from rid.utils.check import check_cv_file \ No newline at end of file diff --git a/rid/utils/check.py b/rid/utils/check.py new file mode 100644 index 00000000..441d2ff5 --- /dev/null +++ b/rid/utils/check.py @@ -0,0 +1,58 @@ +# Trouble shooting for common errors during the workflow + +import os +import subprocess +import numpy as np +from pathlib import Path + +def check_cv_file(file_list : list): + """Parse the cv files with plumed to check if the files are valid. + + Parameters + ---------- + file_list : list + List of file absolute paths. Only one .pdb file is allowed. + + Returns + ------- + Rsl : bool + True if the file is valid, False otherwise. + Rmsg : str + The whole parsing message of PLUMED if the file is invalid, + none otherwise. + """ + input_dir = Path(file_list[0]).parent + os.chdir(input_dir) + + cv_file_list = [file for file in file_list if file[-4:] != ".pdb"] + strut_pdb = [file for file in file_list if file[-4:] == ".pdb"] + assert len(strut_pdb) == 1, \ + "There should be only one .pdb file in the cv files." + + # Fetch the number of atoms from the .pdb file + with open(strut_pdb[0], "rb") as f: + block = -1024 + flag = True + while flag: + f.seek(block, 2) + lines = f.readlines() + lines.reverse() + for line in lines: + if line.startswith(b'ATOM'): + natoms = int(line[6:11]) + flag = False + break + block *= 2 + + # Parse the file with plumed + Rsl = True + Rmsg = '' + for file in cv_file_list: + cmd = f"plumed driver --plumed {file} \ + --parse-only --natoms {natoms}" + (Status, Rmsg) = subprocess.getstatusoutput(cmd) + if Status != 0: + Rsl = False + break + + return Rsl, Rmsg