Skip to content

Commit

Permalink
validate and repair for ligand (#41)
Browse files Browse the repository at this point in the history
* fix: scan bug

* version 0.1.5

* fix: add validate ligand tool

* fix: add validate ligand tool

* feat: add ligand prepare

* fix: add forcefield check

* feat: add a new tool unigbsa-validate

* fix: add protprep for lauching

* version 0.1.6
  • Loading branch information
Aunity authored Aug 18, 2023
1 parent 9e91b64 commit 27e6f14
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 12 deletions.
26 changes: 23 additions & 3 deletions launching/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,22 @@ class GBSAModel(
...


def prep_protein(pdbfile):
outfile = pdbfile[:-4] + '_prep.pdb'
args = [
'-i', pdbfile,
'-o', outfile,
'-mode', 'gromacs',
]
print(args)
argstr = ' '.join(args)
cmd = '/bin/bash -c "source ~/.bashrc; conda activate protprep; unisp-prot %s"'%argstr
RC = os.system(cmd)
if RC != 0:
return -2
return outfile


def gbsa_runner(opts: GBSAModel) -> int:
status = 0
try:
Expand Down Expand Up @@ -217,6 +233,9 @@ def gbsa_runner(opts: GBSAModel) -> int:
}
}
input_protein = opts.input_protein.get_path()
preped_protein = prep_protein(input_protein)
if preped_protein == -2:
raise Exception('ERROR prep protein file.')
input_dir = os.path.dirname(input_protein)
output_dir = str(opts.output_dir)
if not os.path.exists(output_dir):
Expand All @@ -228,9 +247,10 @@ def gbsa_runner(opts: GBSAModel) -> int:

cmd = sh.Command('unigbsa-pipeline')
args = [
'-i', opts.input_protein.get_path(),
'-l', " ".join(opts.input_ligands),
'-c', configfile,
'-i', preped_protein,
'-l', *opts.input_ligands,
'-validate',
'-c', str(configfile),
'-o', csvoutfile
]

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def get_readme(rel_path):
'unigbsa-buildsys = unigbsa.CLI:simulation_builder',
'unigbsa-md = unigbsa.CLI:simulation_run',
'unigbsa-plots = unigbsa.CLI:mmpbsa_plot',
'unigbsa-scan = unigbsa.scanparas.scan:main'
'unigbsa-scan = unigbsa.scanparas.scan:main',
'unigbsa-validate = unigbsa.CLI:ligand_check'
]},
include_package_data=True
)
20 changes: 20 additions & 0 deletions unigbsa/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .utils import generate_index_file, process_pbc

from .simulation import topology, mdrun
from .simulation.utils import ligand_validate
from .settings import logging
from .version import __version__

Expand Down Expand Up @@ -292,3 +293,22 @@ def mmpbsa_plot():
func = funcdic[fname]
logging.info('Analysis file: %s'%fname)
func(datfile, outdir=oup)


def ligand_check():
parser = argparse.ArgumentParser(description='Analysis and plot results for MMPBSA.')
parser.add_argument('-i', help='Ligand file to validate.', required=True)
parser.add_argument('-o', help='Ligand file after validate. default: None', default=None)
parser.add_argument('-v', '--version', action='version', version="{prog}s ({version})".format(prog="%(prog)", version=__version__))

args = parser.parse_args()
ligandfile, outfile = args.i, args.o
if not outfile:
outfile = ligand_validate(ligandfile, outfile)
if outfile == ligandfile:
print(f'{ligandfile} is OK as the input for unigbsa.')
else:
print(f'{ligandfile} is not OK as the input for unigbsa. But unigbsa-validate can repair it.')
else:
outfile = ligand_validate(ligandfile, outfile)
print(f'{outfile} is OK as the input for unigbsa.')
17 changes: 11 additions & 6 deletions unigbsa/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from unigbsa.utils import generate_index_file, load_configue_file
from unigbsa.simulation.mdrun import GMXEngine
from unigbsa.simulation.topology import build_topol, build_protein
from unigbsa.simulation.utils import ligand_validate
from unigbsa.settings import logging, DEFAULT_CONFIGURE_FILE, GMXEXE, set_OMP_NUM_THREADS

from tqdm import tqdm
Expand Down Expand Up @@ -52,7 +53,7 @@ def traj_pipeline(complexfile, trajfile, topolfile, indexfile, pbsaParas=None, m
print('%6d %4s %18.4f '%(irow['Frames'], irow['mode'], irow['TOTAL']))
return detal_G

def base_pipeline(receptorfile, ligandfiles, paras, nt=1, mmpbsafile=None, outfile='BindingEnergy.csv', verbose=False):
def base_pipeline(receptorfile, ligandfiles, paras, nt=1, mmpbsafile=None, outfile='BindingEnergy.csv', validate=False, verbose=False):
"""
This function takes a receptorfile and ligandfile, and build a complex.pdb and complex.top file
Expand Down Expand Up @@ -81,7 +82,8 @@ def base_pipeline(receptorfile, ligandfiles, paras, nt=1, mmpbsafile=None, outfi
if not os.path.exists(ligandName):
os.mkdir(ligandName)
os.chdir(ligandName)

if validate:
ligandfile = ligand_validate(ligandfile, ligandName+'.mol')
grofile = 'complex.pdb'
topfile = 'complex.top'
logging.info('Build ligand topology: %s'%ligandName)
Expand Down Expand Up @@ -118,7 +120,7 @@ def base_pipeline(receptorfile, ligandfiles, paras, nt=1, mmpbsafile=None, outfi


def single(arg):
receptor, ligandfile, simParas, ligandfiles, mmpbsafile, nt, pbsaParas, verbose, receptorfile = arg
receptor, ligandfile, simParas, ligandfiles, mmpbsafile, nt, pbsaParas, validate, verbose, receptorfile = arg
d1 = pd.DataFrame({'Frames': 1, 'mode':pbsaParas['modes'], 'complex':0.0,'receptor':0.0,'ligand':0.0,'Internal':0.0,'Van der Waals':0.0,'Electrostatic':0,'Polar Solvation':0.0,'Non-Polar Solvation':0.0,'Gas':0.0,'Solvation':0.0,'TOTAL':0.0}, index=[1])
cwd = os.getcwd()
statu = 'S'
Expand All @@ -127,6 +129,8 @@ def single(arg):
if not os.path.exists(ligandName):
os.mkdir(ligandName)
os.chdir(ligandName)
if validate:
ligandfile = ligand_validate(ligandfile, ligandName+'.mol')
grofile = 'complex.pdb'
topfile = 'complex.top'
if len(ligandfiles) == 1:
Expand Down Expand Up @@ -179,7 +183,7 @@ def single(arg):
d1['status'] = statu
return d1

def minim_pipeline(receptorfile, ligandfiles, paras, mmpbsafile=None, nt=1, outfile='BindingEnergy.csv', verbose=False):
def minim_pipeline(receptorfile, ligandfiles, paras, mmpbsafile=None, nt=1, outfile='BindingEnergy.csv', validate=False, verbose=False):
"""
It runs the simulation pipeline for each ligand.
Expand All @@ -199,7 +203,7 @@ def minim_pipeline(receptorfile, ligandfiles, paras, mmpbsafile=None, nt=1, outf

args = [(receptor, ligandfile, simParas,
ligandfiles, mmpbsafile, 1,
pbsaParas, verbose, receptorfile) for ligandfile in sorted(ligandfiles)]
pbsaParas, validate, verbose, receptorfile) for ligandfile in sorted(ligandfiles)]
if len(args) == 1:
df = single(args[0])
else:
Expand Down Expand Up @@ -285,6 +289,7 @@ def main(args=None):
parser.add_argument('-d', dest='ligdir', help='Floder contains many ligand files. file format: .mol or .sdf', default=None)
parser.add_argument('-f', dest='pbsafile', help='gmx_MMPBSA input file. default=None', default=None)
parser.add_argument('-o', dest='outfile', help='Output file.', default='BindingEnergy.csv')
parser.add_argument('-validate', help='Validate the ligand file. default: False', action='store_true', default=False)
parser.add_argument('-nt', dest='thread', help='Set number of thread to run this program.', type=int, default=multiprocessing.cpu_count())
parser.add_argument('--decomp', help='Decompose the free energy. default:False', action='store_true', default=False)
parser.add_argument('--verbose', help='Keep all the files.', action='store_true', default=False)
Expand Down Expand Up @@ -323,7 +328,7 @@ def main(args=None):
paras['GBSA']['modes'] = gbtype

if paras['simulation']['mode'] == 'em':
minim_pipeline(receptorfile=receptor, ligandfiles=ligands, paras=paras, outfile=outfile, mmpbsafile=mmpbsafile, verbose=verbose, nt=nt)
minim_pipeline(receptorfile=receptor, ligandfiles=ligands, paras=paras, outfile=outfile, mmpbsafile=mmpbsafile, validate=args.validate, verbose=verbose, nt=nt)
elif paras['simulation']['mode'] == 'md':
md_pipeline(receptorfile=receptor, ligandfiles=ligands, paras=paras, outfile=outfile, mmpbsafile=mmpbsafile, verbose=verbose, nt=nt)
elif paras['simulation']['mode'] == 'input':
Expand Down
163 changes: 162 additions & 1 deletion unigbsa/simulation/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import os
from unigbsa.settings import GMXEXE
import uuid
import shutil
from pathlib import Path
from unigbsa.settings import GMXEXE, logging


def convert_format(inputfile, filetype, outfile=None, outtype='mol'):
"""
Convert a file of type filetype to mol2 format
Expand Down Expand Up @@ -335,3 +340,159 @@ def obtain_net_charge(sdfile):
# Calculate the net charge of the molecule
net_charge = mol.GetTotalCharge()
return net_charge


def check_forcefield(sdfile):
sqm_key = "grms_tol=0.005,qm_theory='AM1',scfconv=1.d-5,ndiis_attempts=700,maxcyc=0"
tmpdir = Path('/tmp') / str(uuid.uuid4())
sdfile = Path(sdfile).absolute()
net_charge = obtain_net_charge(str(sdfile))
tmpdir.mkdir(exist_ok=True)
cwd = os.getcwd()
os.chdir(tmpdir)
cmd = f'export OMP_NUM_THREADS=1;acpype -i {sdfile} -b MOL -a gaff -c bcc -n {net_charge} -k "{sqm_key}" >acpype.log 2>&1 '
rc = os.system(cmd)
os.chdir(cwd)
shutil.rmtree(tmpdir)
if rc != 0:
return False
else:
return True


def check_element(sdfile):
from openbabel import openbabel
ext = Path(sdfile).suffix[1:]
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats(ext, ext)
mol = openbabel.OBMol()
if not obConversion.ReadFile(mol, str(sdfile)):
return -1

acceptable_elements = {6, 7, 8, 16, 15, 1, 9, 17, 35, 53}
for atom in openbabel.OBMolAtomIter(mol):
element = atom.GetAtomicNum()
if element not in acceptable_elements:
return 1
return 0


def get_total_valence_electrons(sdfile):
from openbabel import openbabel
extin = Path(sdfile).suffix[1:]
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats(extin, extin)
mol = openbabel.OBMol()
obConversion.ReadFile(mol, str(sdfile))
total_valence_electrons = 0
for atom in openbabel.OBMolAtomIter(mol):
total_valence_electrons += atom.GetAtomicNum() - atom.GetFormalCharge()
return total_valence_electrons


def get_electronegativity(atomic_number):
electronegativity_table = {
1: 2.20, # Hydrogen (H)
6: 2.55, # Carbon (C)
7: 3.04, # Nitrogen (N)
8: 3.44, # Oxygen (O)
15: 2.19, # Phosphorus (P)
16: 2.58, # Sulfur (S)
9: 3.98, # Fluorine (F)
17: 3.16, # Chlorine (Cl)
35: 2.96, # Bromine (Br)
53: 2.66, # Iodine (I)
}
return electronegativity_table.get(atomic_number, 0)


def adjust_charge_based_on_electronegativity(ob_mol):
from openbabel import openbabel
max_electronegativity = 0
target_atom = None

for atom in openbabel.OBMolAtomIter(ob_mol):
if atom.GetFormalCharge() != 0:
electronegativity = get_electronegativity(atom.GetAtomicNum())
if electronegativity > max_electronegativity:
max_electronegativity = electronegativity
target_atom = atom

if target_atom is not None:
if target_atom.GetFormalCharge() > 0:
target_atom.SetFormalCharge(target_atom.GetFormalCharge() - 1)
elif target_atom.GetFormalCharge() < 0:
target_atom.SetFormalCharge(target_atom.GetFormalCharge() + 1)


def add_hydrogen(sdfile, outfile=None):
from openbabel import openbabel
extin = extout = Path(sdfile).suffix[1:]
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats(extin, extout)
mol = openbabel.OBMol()
obConversion.ReadFile(mol, str(sdfile))
mol.AddHydrogens()
mol_string = obConversion.WriteString(mol)
if outfile:
with open(outfile, 'w') as fw:
fw.write(mol_string)
else:
return mol


def repair_ligand(sdfile, outfile=None):
from openbabel import openbabel
extin = Path(sdfile).suffix[1:]
if outfile is None:
fname = str(uuid.uuid4()) + '.' + extin
outfile = Path('/tmp') / fname
extout = Path(outfile).suffix[1:]
obConversion = openbabel.OBConversion()
obConversion.SetInAndOutFormats(extin, extout)
mol = openbabel.OBMol()
obConversion.ReadFile(mol, str(sdfile))
mol.AddHydrogens()
mol.DeleteHydrogens()
# Correct valence information
pH = 7.4
charge_model = openbabel.OBChargeModel.FindType("mmff94")
charge_model.ComputeCharges(mol, str(pH))
mol.CorrectForPH()
mol.PerceiveBondOrders()
total_valence_electrons = sum([atom.GetAtomicNum() - atom.GetFormalCharge() for atom in openbabel.OBMolAtomIter(mol)])
if total_valence_electrons % 2 == 1:
adjust_charge_based_on_electronegativity(mol)
mol_string = obConversion.WriteString(mol)
new_string = []
for i, line in enumerate(mol_string.split('\n')):
if i >= 4 and len(line) >= 30 and line.startswith(' '):
line = line[:42] + ' 0 0 0 0 0 0 0 0 0'
new_string.append(line)
mol_string = '\n'.join(new_string)
with open(outfile, 'w') as fw:
fw.write(mol_string)
add_hydrogen(outfile, outfile)
return outfile


def ligand_validate(sdfile, outfile=None):
rc = check_element(sdfile)
if rc == -1:
logging.error(f'Failed to load {sdfile}, please check your input {sdfile}.')
exit(256)
elif rc == 1:
logging.error(f'Ligand file only accept C N O S P H F Cl Br I, please check your input {sdfile}.')
exit(257)
total_valence_electrons = get_total_valence_electrons(sdfile)
if total_valence_electrons % 2 == 1 or not check_forcefield(sdfile):
logging.warning(f'The total valence electrons of your ligand is odd({total_valence_electrons}) or forcefield check error, try to repair input ligand.')
outfile = repair_ligand(sdfile, outfile=outfile)
total_valence_electrons = get_total_valence_electrons(outfile)
if total_valence_electrons % 2 == 1 or not check_forcefield(outfile):
logging.error(f'The total valence electrons of your ligand is stil odd({total_valence_electrons}) or forcefield check error after repair ligand, please check your input {sdfile}.')
exit(257)
else:
return outfile
else:
return sdfile
2 changes: 1 addition & 1 deletion unigbsa/version.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__="0.1.5"
__version__="0.1.6"

0 comments on commit 27e6f14

Please sign in to comment.